Browse Source

added s3 functions

master
flashlan 3 years ago
parent
commit
ae540c5517
  1. 275
      compareDBs.ipynb

275
compareDBs.ipynb

@ -47,7 +47,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 1,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", "id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -59,17 +59,14 @@
"False" "False"
] ]
}, },
"execution_count": 12, "execution_count": 1,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"import configparser\n", "import configparser\n",
"\n",
"# import pymongo\n",
"import io\n", "import io\n",
"import time\n",
"import timeit\n", "import timeit\n",
"from datetime import datetime\n", "from datetime import datetime\n",
"\n", "\n",
@ -138,7 +135,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", "id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -176,86 +173,86 @@
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>999995</th>\n",
" <td>7730801</td>\n", " <td>7984748</td>\n",
" <td>2023-01-02 15:58:45</td>\n", " <td>2023-03-03 18:13:30</td>\n",
" <td>1672675140000000000</td>\n", " <td>1677867225000000000</td>\n",
" <td>2023-01-02 15:59:00</td>\n", " <td>2023-03-03 18:13:45</td>\n",
" <td>1.065995</td>\n", " <td>1.062695</td>\n",
" <td>1.066035</td>\n", " <td>1.062635</td>\n",
" <td>1.065930</td>\n", " <td>1.062630</td>\n",
" <td>1.066070</td>\n", " <td>1.062700</td>\n",
" <td>57</td>\n", " <td>64</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>999996</th>\n",
" <td>7730802</td>\n", " <td>7984749</td>\n",
" <td>2023-01-02 15:59:00</td>\n", " <td>2023-03-03 18:13:45</td>\n",
" <td>1672675155000000000</td>\n", " <td>1677867240000000000</td>\n",
" <td>2023-01-02 15:59:15</td>\n", " <td>2023-03-03 18:14:00</td>\n",
" <td>1.066055</td>\n", " <td>1.062645</td>\n",
" <td>1.066085</td>\n", " <td>1.062650</td>\n",
" <td>1.066005</td>\n", " <td>1.062625</td>\n",
" <td>1.066115</td>\n", " <td>1.062650</td>\n",
" <td>52</td>\n", " <td>43</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>999997</th>\n",
" <td>7730803</td>\n", " <td>7984750</td>\n",
" <td>2023-01-02 15:59:15</td>\n", " <td>2023-03-03 18:14:00</td>\n",
" <td>1672675170000000000</td>\n", " <td>1677867255000000000</td>\n",
" <td>2023-01-02 15:59:30</td>\n", " <td>2023-03-03 18:14:15</td>\n",
" <td>1.066080</td>\n", " <td>1.062640</td>\n",
" <td>1.066025</td>\n", " <td>1.062625</td>\n",
" <td>1.066025</td>\n", " <td>1.062620</td>\n",
" <td>1.066110</td>\n", " <td>1.062665</td>\n",
" <td>57</td>\n", " <td>47</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>999998</th>\n",
" <td>7730804</td>\n", " <td>7984751</td>\n",
" <td>2023-01-02 15:59:30</td>\n", " <td>2023-03-03 18:14:15</td>\n",
" <td>1672675185000000000</td>\n", " <td>1677867270000000000</td>\n",
" <td>2023-01-02 15:59:45</td>\n", " <td>2023-03-03 18:14:30</td>\n",
" <td>1.065980</td>\n", " <td>1.062625</td>\n",
" <td>1.065985</td>\n", " <td>1.062535</td>\n",
" <td>1.065885</td>\n", " <td>1.062535</td>\n",
" <td>1.066045</td>\n", " <td>1.062645</td>\n",
" <td>64</td>\n", " <td>43</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>999999</th>\n",
" <td>7730805</td>\n", " <td>7984752</td>\n",
" <td>2023-01-02 15:59:45</td>\n", " <td>2023-03-03 18:14:30</td>\n",
" <td>1672675200000000000</td>\n", " <td>1677867285000000000</td>\n",
" <td>2023-01-02 16:00:00</td>\n", " <td>2023-03-03 18:14:45</td>\n",
" <td>1.065975</td>\n", " <td>1.062535</td>\n",
" <td>1.066055</td>\n", " <td>1.062520</td>\n",
" <td>1.065830</td>\n", " <td>1.062520</td>\n",
" <td>1.066055</td>\n", " <td>1.062580</td>\n",
" <td>50</td>\n", " <td>59</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" id from at to \\\n", " id from at \\\n",
"0 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n", "999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n",
"1 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n", "999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n",
"2 7730803 2023-01-02 15:59:15 1672675170000000000 2023-01-02 15:59:30 \n", "999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n",
"3 7730804 2023-01-02 15:59:30 1672675185000000000 2023-01-02 15:59:45 \n", "999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n",
"4 7730805 2023-01-02 15:59:45 1672675200000000000 2023-01-02 16:00:00 \n", "999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n",
"\n", "\n",
" open close min max volume \n", " to open close min max volume \n",
"0 1.065995 1.066035 1.065930 1.066070 57 \n", "999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n",
"1 1.066055 1.066085 1.066005 1.066115 52 \n", "999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n",
"2 1.066080 1.066025 1.066025 1.066110 57 \n", "999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n",
"3 1.065980 1.065985 1.065885 1.066045 64 \n", "999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n",
"4 1.065975 1.066055 1.065830 1.066055 50 " "999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
] ]
}, },
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -264,7 +261,7 @@
"# %%time\n", "# %%time\n",
"# Load Dataset\n", "# Load Dataset\n",
"df = pd.read_csv(\"out.csv\", index_col=0)\n", "df = pd.read_csv(\"out.csv\", index_col=0)\n",
"df.head()" "df.tail()"
] ]
}, },
{ {
@ -294,7 +291,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 6,
"id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce", "id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -308,6 +305,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58", "id": "4a8d5703-9bc9-4d38-83ff-457159304d58",
"metadata": { "metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": [] "tags": []
}, },
"source": [ "source": [
@ -316,7 +314,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 9,
"id": "c3202bbb-2655-45b2-b166-9f45a3ef854c", "id": "c3202bbb-2655-45b2-b166-9f45a3ef854c",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -328,7 +326,7 @@
"'Database created'" "'Database created'"
] ]
}, },
"execution_count": 22, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -697,6 +695,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05", "id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05",
"metadata": { "metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": [] "tags": []
}, },
"source": [ "source": [
@ -735,14 +734,6 @@
"# testar função" "# testar função"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "e173a45b-60a1-4c33-946e-ccf98bf8e97f",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 18,
@ -811,65 +802,6 @@
"psql_read_execution_time = stop - start" "psql_read_execution_time = stop - start"
] ]
}, },
{
"cell_type": "code",
"execution_count": 10,
"id": "a7883c4d-4609-4380-8a45-246b7ca2f9c5",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'engine' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m<timed exec>:2\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'engine' is not defined"
]
}
],
"source": [
"# %%time\n",
"# # Write\n",
"# conn = engine.raw_connection()\n",
"# cur = conn.cursor()\n",
"# output = io.StringIO()\n",
"# df.to_csv(output, sep=\"\\t\", header=False, index=False)\n",
"# output.seek(0)\n",
"# contents = output.getvalue()\n",
"\n",
"# cur.copy_from(output, \"comparedbs\") # , null=\"\") # null values become ''\n",
"# conn.commit()\n",
"# cur.close()\n",
"# conn.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "73de4294-1284-49b0-b31e-45db6e835877",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e37a93e1-fc0e-4d27-9e16-dca6c8aea324",
"metadata": {},
"outputs": [],
"source": [
"start = time.time()\n",
"# %%time\n",
"# Read\n",
"df = pd.read_sql_query('select * from \"comparedbs\"', con=engine)\n",
"end = time.time()\n",
"postgresql_read_time = exec_time(start, end)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -891,7 +823,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"df.head()" "# df.head()"
] ]
}, },
{ {
@ -906,8 +838,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 10,
"id": "60a990e2-4607-4654-84ec-17d4985adae2", "id": "7c7022bf-9c3b-400a-9045-b089483f05ad",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -915,32 +847,63 @@
"source": [ "source": [
"# fazer sem funçao para ver se melhora\n", "# fazer sem funçao para ver se melhora\n",
"# verifique se esta no ssd os arquivos da pasta git\n", "# verifique se esta no ssd os arquivos da pasta git\n",
"def main():\n", "def s3Connect():\n",
" client = Minio(\n", " client = Minio(\n",
" S3MinioUrl,\n", " S3MinioUrl,\n",
" secure=False,\n", " secure=False,\n",
" region=S3MinioRegion,\n", " region=S3MinioRegion,\n",
" access_key=\"MatMPA7NyHltz7DQ\",\n", " access_key=S3MinioUser,\n",
" secret_key=\"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n", " secret_key=S3MinioKey,\n",
" )\n", " )\n",
" return client\n",
"\n",
"\n", "\n",
" # Make bucket if not exist.\n", "def s3CreateBucket(bucketName=\"data\"):\n",
" found = client.bucket_exists(\"data\")\n", " client = s3Connect()\n",
" found = client.bucket_exists(bucketName)\n",
" if not found:\n", " if not found:\n",
" client.make_bucket(\"data\")\n", " return client.make_bucket(bucketName)\n",
" else:\n", " else:\n",
" print(\"Bucket 'data' already exists\")\n", " return \"Bucket '{}' already exists\".format(bucketName)\n",
"\n", "\n",
" # Upload\n", "\n",
"def s3uploadCsv():\n",
" client = s3Connect()\n",
" client.fput_object(\n", " client.fput_object(\n",
" \"data\",\n", " \"data\",\n",
" \"data.parquet\",\n", " \"data.parquet\",\n",
" \"data/data.parquet\",\n", " \"data/data.parquet\",\n",
" )\n", " )\n",
" # print(\n", " return (\n",
" # \"'data/data.parquet' is successfully uploaded as \"\n", " \"'data/data.parquet' is successfully uploaded as \"\n",
" # \"object 'data.parquet' to bucket 'data'.\"\n", " \"object 'data.parquet' to bucket 'data'.\"\n",
" # )" " )"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "cd7fe012-9eee-4f91-8c07-8e0148633766",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def main():\n",
" # Insert to db and benchmark time\n",
" df.to_parquet(\"data/data.parquet\")\n",
" s3CreateBucket()\n",
" start = timeit.default_timer()\n",
" s3uploadCsv()\n",
" stop = timeit.default_timer()\n",
" s3_write_execution_time = stop - start\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" try:\n",
" main()\n",
" except S3Error as exc:\n",
" print(\"error occurred.\", exc)"
] ]
}, },
{ {
@ -1136,8 +1099,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import numpy as np\n",
"\n",
"np.bool = np.bool_\n", "np.bool = np.bool_\n",
"from qpython import qconnection" "from qpython import qconnection"
] ]

Loading…
Cancel
Save