From ae540c55170c19c9faa868ccb43f8764a81e3fdd Mon Sep 17 00:00:00 2001 From: flashlan Date: Wed, 14 Jun 2023 18:47:28 -0300 Subject: [PATCH] added s3 functions --- compareDBs.ipynb | 275 ++++++++++++++++++++--------------------------- 1 file changed, 118 insertions(+), 157 deletions(-) diff --git a/compareDBs.ipynb b/compareDBs.ipynb index 7082ef4..64419cd 100644 --- a/compareDBs.ipynb +++ b/compareDBs.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", "metadata": { "tags": [] @@ -59,17 +59,14 @@ "False" ] }, - "execution_count": 12, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import configparser\n", - "\n", - "# import pymongo\n", "import io\n", - "import time\n", "import timeit\n", "from datetime import datetime\n", "\n", @@ -138,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "metadata": {}, "outputs": [ @@ -176,86 +173,86 @@ " \n", " \n", " \n", - " 0\n", - " 7730801\n", - " 2023-01-02 15:58:45\n", - " 1672675140000000000\n", - " 2023-01-02 15:59:00\n", - " 1.065995\n", - " 1.066035\n", - " 1.065930\n", - " 1.066070\n", - " 57\n", + " 999995\n", + " 7984748\n", + " 2023-03-03 18:13:30\n", + " 1677867225000000000\n", + " 2023-03-03 18:13:45\n", + " 1.062695\n", + " 1.062635\n", + " 1.062630\n", + " 1.062700\n", + " 64\n", " \n", " \n", - " 1\n", - " 7730802\n", - " 2023-01-02 15:59:00\n", - " 1672675155000000000\n", - " 2023-01-02 15:59:15\n", - " 1.066055\n", - " 1.066085\n", - " 1.066005\n", - " 1.066115\n", - " 52\n", + " 999996\n", + " 7984749\n", + " 2023-03-03 18:13:45\n", + " 1677867240000000000\n", + " 2023-03-03 18:14:00\n", + " 1.062645\n", + " 1.062650\n", + " 1.062625\n", + " 1.062650\n", + " 43\n", " \n", " \n", - " 2\n", - " 7730803\n", - " 2023-01-02 15:59:15\n", - " 1672675170000000000\n", - " 2023-01-02 15:59:30\n", - " 1.066080\n", - " 1.066025\n", - " 1.066025\n", - " 1.066110\n", - " 57\n", + " 999997\n", + " 7984750\n", + " 2023-03-03 18:14:00\n", + " 1677867255000000000\n", + " 2023-03-03 18:14:15\n", + " 1.062640\n", + " 1.062625\n", + " 1.062620\n", + " 1.062665\n", + " 47\n", " \n", " \n", - " 3\n", - " 7730804\n", - " 2023-01-02 15:59:30\n", - " 1672675185000000000\n", - " 2023-01-02 15:59:45\n", - " 1.065980\n", - " 1.065985\n", - " 1.065885\n", - " 1.066045\n", - " 64\n", + " 999998\n", + " 7984751\n", + " 2023-03-03 18:14:15\n", + " 1677867270000000000\n", + " 2023-03-03 18:14:30\n", + " 1.062625\n", + " 1.062535\n", + " 1.062535\n", + " 1.062645\n", + " 43\n", " \n", " \n", - " 4\n", - " 7730805\n", - " 2023-01-02 15:59:45\n", - " 1672675200000000000\n", - " 2023-01-02 16:00:00\n", - " 1.065975\n", - " 1.066055\n", - " 1.065830\n", - " 1.066055\n", - " 50\n", + " 999999\n", + " 7984752\n", + " 2023-03-03 18:14:30\n", + " 1677867285000000000\n", + " 2023-03-03 18:14:45\n", + " 1.062535\n", + " 1.062520\n", + " 1.062520\n", + " 1.062580\n", + " 59\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id from at to \\\n", - "0 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n", - "1 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n", - "2 7730803 2023-01-02 15:59:15 1672675170000000000 2023-01-02 15:59:30 \n", - "3 7730804 2023-01-02 15:59:30 1672675185000000000 2023-01-02 15:59:45 \n", - "4 7730805 2023-01-02 15:59:45 1672675200000000000 2023-01-02 16:00:00 \n", + " id from at \\\n", + "999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n", + "999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n", + "999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n", + "999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n", + "999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n", "\n", - " open close min max volume \n", - "0 1.065995 1.066035 1.065930 1.066070 57 \n", - "1 1.066055 1.066085 1.066005 1.066115 52 \n", - "2 1.066080 1.066025 1.066025 1.066110 57 \n", - "3 1.065980 1.065985 1.065885 1.066045 64 \n", - "4 1.065975 1.066055 1.065830 1.066055 50 " + " to open close min max volume \n", + "999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n", + "999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n", + "999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n", + "999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n", + "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 " ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -264,7 +261,7 @@ "# %%time\n", "# Load Dataset\n", "df = pd.read_csv(\"out.csv\", index_col=0)\n", - "df.head()" + "df.tail()" ] }, { @@ -294,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce", "metadata": {}, "outputs": [], @@ -308,6 +305,7 @@ "cell_type": "markdown", "id": "4a8d5703-9bc9-4d38-83ff-457159304d58", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -316,7 +314,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "id": "c3202bbb-2655-45b2-b166-9f45a3ef854c", "metadata": { "tags": [] @@ -328,7 +326,7 @@ "'Database created'" ] }, - "execution_count": 22, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -697,6 +695,7 @@ "cell_type": "markdown", "id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -735,14 +734,6 @@ "# testar função" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "e173a45b-60a1-4c33-946e-ccf98bf8e97f", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 18, @@ -811,65 +802,6 @@ "psql_read_execution_time = stop - start" ] }, - { - "cell_type": "code", - "execution_count": 10, - "id": "a7883c4d-4609-4380-8a45-246b7ca2f9c5", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'engine' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m:2\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'engine' is not defined" - ] - } - ], - "source": [ - "# %%time\n", - "# # Write\n", - "# conn = engine.raw_connection()\n", - "# cur = conn.cursor()\n", - "# output = io.StringIO()\n", - "# df.to_csv(output, sep=\"\\t\", header=False, index=False)\n", - "# output.seek(0)\n", - "# contents = output.getvalue()\n", - "\n", - "# cur.copy_from(output, \"comparedbs\") # , null=\"\") # null values become ''\n", - "# conn.commit()\n", - "# cur.close()\n", - "# conn.close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73de4294-1284-49b0-b31e-45db6e835877", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e37a93e1-fc0e-4d27-9e16-dca6c8aea324", - "metadata": {}, - "outputs": [], - "source": [ - "start = time.time()\n", - "# %%time\n", - "# Read\n", - "df = pd.read_sql_query('select * from \"comparedbs\"', con=engine)\n", - "end = time.time()\n", - "postgresql_read_time = exec_time(start, end)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -891,7 +823,7 @@ }, "outputs": [], "source": [ - "df.head()" + "# df.head()" ] }, { @@ -906,8 +838,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "60a990e2-4607-4654-84ec-17d4985adae2", + "execution_count": 10, + "id": "7c7022bf-9c3b-400a-9045-b089483f05ad", "metadata": { "tags": [] }, @@ -915,32 +847,63 @@ "source": [ "# fazer sem funçao para ver se melhora\n", "# verifique se esta no ssd os arquivos da pasta git\n", - "def main():\n", + "def s3Connect():\n", " client = Minio(\n", " S3MinioUrl,\n", " secure=False,\n", " region=S3MinioRegion,\n", - " access_key=\"MatMPA7NyHltz7DQ\",\n", - " secret_key=\"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n", + " access_key=S3MinioUser,\n", + " secret_key=S3MinioKey,\n", " )\n", + " return client\n", + "\n", "\n", - " # Make bucket if not exist.\n", - " found = client.bucket_exists(\"data\")\n", + "def s3CreateBucket(bucketName=\"data\"):\n", + " client = s3Connect()\n", + " found = client.bucket_exists(bucketName)\n", " if not found:\n", - " client.make_bucket(\"data\")\n", + " return client.make_bucket(bucketName)\n", " else:\n", - " print(\"Bucket 'data' already exists\")\n", + " return \"Bucket '{}' already exists\".format(bucketName)\n", + "\n", "\n", - " # Upload\n", + "def s3uploadCsv():\n", + " client = s3Connect()\n", " client.fput_object(\n", " \"data\",\n", " \"data.parquet\",\n", " \"data/data.parquet\",\n", " )\n", - " # print(\n", - " # \"'data/data.parquet' is successfully uploaded as \"\n", - " # \"object 'data.parquet' to bucket 'data'.\"\n", - " # )" + " return (\n", + " \"'data/data.parquet' is successfully uploaded as \"\n", + " \"object 'data.parquet' to bucket 'data'.\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "cd7fe012-9eee-4f91-8c07-8e0148633766", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def main():\n", + " # Insert to db and benchmark time\n", + " df.to_parquet(\"data/data.parquet\")\n", + " s3CreateBucket()\n", + " start = timeit.default_timer()\n", + " s3uploadCsv()\n", + " stop = timeit.default_timer()\n", + " s3_write_execution_time = stop - start\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " try:\n", + " main()\n", + " except S3Error as exc:\n", + " print(\"error occurred.\", exc)" ] }, { @@ -1136,8 +1099,6 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "\n", "np.bool = np.bool_\n", "from qpython import qconnection" ]