Browse Source

added s3 functions

master
flashlan 3 years ago
parent
commit
ae540c5517
  1. 275
      compareDBs.ipynb

275
compareDBs.ipynb

@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 1,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": {
"tags": []
@ -59,17 +59,14 @@
"False"
]
},
"execution_count": 12,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import configparser\n",
"\n",
"# import pymongo\n",
"import io\n",
"import time\n",
"import timeit\n",
"from datetime import datetime\n",
"\n",
@ -138,7 +135,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {},
"outputs": [
@ -176,86 +173,86 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7730801</td>\n",
" <td>2023-01-02 15:58:45</td>\n",
" <td>1672675140000000000</td>\n",
" <td>2023-01-02 15:59:00</td>\n",
" <td>1.065995</td>\n",
" <td>1.066035</td>\n",
" <td>1.065930</td>\n",
" <td>1.066070</td>\n",
" <td>57</td>\n",
" <th>999995</th>\n",
" <td>7984748</td>\n",
" <td>2023-03-03 18:13:30</td>\n",
" <td>1677867225000000000</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1.062695</td>\n",
" <td>1.062635</td>\n",
" <td>1.062630</td>\n",
" <td>1.062700</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7730802</td>\n",
" <td>2023-01-02 15:59:00</td>\n",
" <td>1672675155000000000</td>\n",
" <td>2023-01-02 15:59:15</td>\n",
" <td>1.066055</td>\n",
" <td>1.066085</td>\n",
" <td>1.066005</td>\n",
" <td>1.066115</td>\n",
" <td>52</td>\n",
" <th>999996</th>\n",
" <td>7984749</td>\n",
" <td>2023-03-03 18:13:45</td>\n",
" <td>1677867240000000000</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1.062645</td>\n",
" <td>1.062650</td>\n",
" <td>1.062625</td>\n",
" <td>1.062650</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7730803</td>\n",
" <td>2023-01-02 15:59:15</td>\n",
" <td>1672675170000000000</td>\n",
" <td>2023-01-02 15:59:30</td>\n",
" <td>1.066080</td>\n",
" <td>1.066025</td>\n",
" <td>1.066025</td>\n",
" <td>1.066110</td>\n",
" <td>57</td>\n",
" <th>999997</th>\n",
" <td>7984750</td>\n",
" <td>2023-03-03 18:14:00</td>\n",
" <td>1677867255000000000</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1.062640</td>\n",
" <td>1.062625</td>\n",
" <td>1.062620</td>\n",
" <td>1.062665</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7730804</td>\n",
" <td>2023-01-02 15:59:30</td>\n",
" <td>1672675185000000000</td>\n",
" <td>2023-01-02 15:59:45</td>\n",
" <td>1.065980</td>\n",
" <td>1.065985</td>\n",
" <td>1.065885</td>\n",
" <td>1.066045</td>\n",
" <td>64</td>\n",
" <th>999998</th>\n",
" <td>7984751</td>\n",
" <td>2023-03-03 18:14:15</td>\n",
" <td>1677867270000000000</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1.062625</td>\n",
" <td>1.062535</td>\n",
" <td>1.062535</td>\n",
" <td>1.062645</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7730805</td>\n",
" <td>2023-01-02 15:59:45</td>\n",
" <td>1672675200000000000</td>\n",
" <td>2023-01-02 16:00:00</td>\n",
" <td>1.065975</td>\n",
" <td>1.066055</td>\n",
" <td>1.065830</td>\n",
" <td>1.066055</td>\n",
" <td>50</td>\n",
" <th>999999</th>\n",
" <td>7984752</td>\n",
" <td>2023-03-03 18:14:30</td>\n",
" <td>1677867285000000000</td>\n",
" <td>2023-03-03 18:14:45</td>\n",
" <td>1.062535</td>\n",
" <td>1.062520</td>\n",
" <td>1.062520</td>\n",
" <td>1.062580</td>\n",
" <td>59</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id from at to \\\n",
"0 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n",
"1 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n",
"2 7730803 2023-01-02 15:59:15 1672675170000000000 2023-01-02 15:59:30 \n",
"3 7730804 2023-01-02 15:59:30 1672675185000000000 2023-01-02 15:59:45 \n",
"4 7730805 2023-01-02 15:59:45 1672675200000000000 2023-01-02 16:00:00 \n",
" id from at \\\n",
"999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n",
"999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n",
"999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n",
"999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n",
"999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n",
"\n",
" open close min max volume \n",
"0 1.065995 1.066035 1.065930 1.066070 57 \n",
"1 1.066055 1.066085 1.066005 1.066115 52 \n",
"2 1.066080 1.066025 1.066025 1.066110 57 \n",
"3 1.065980 1.065985 1.065885 1.066045 64 \n",
"4 1.065975 1.066055 1.065830 1.066055 50 "
" to open close min max volume \n",
"999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n",
"999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n",
"999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n",
"999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n",
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 "
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@ -264,7 +261,7 @@
"# %%time\n",
"# Load Dataset\n",
"df = pd.read_csv(\"out.csv\", index_col=0)\n",
"df.head()"
"df.tail()"
]
},
{
@ -294,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce",
"metadata": {},
"outputs": [],
@ -308,6 +305,7 @@
"cell_type": "markdown",
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -316,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 9,
"id": "c3202bbb-2655-45b2-b166-9f45a3ef854c",
"metadata": {
"tags": []
@ -328,7 +326,7 @@
"'Database created'"
]
},
"execution_count": 22,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -697,6 +695,7 @@
"cell_type": "markdown",
"id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -735,14 +734,6 @@
"# testar função"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e173a45b-60a1-4c33-946e-ccf98bf8e97f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 18,
@ -811,65 +802,6 @@
"psql_read_execution_time = stop - start"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "a7883c4d-4609-4380-8a45-246b7ca2f9c5",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'engine' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m<timed exec>:2\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'engine' is not defined"
]
}
],
"source": [
"# %%time\n",
"# # Write\n",
"# conn = engine.raw_connection()\n",
"# cur = conn.cursor()\n",
"# output = io.StringIO()\n",
"# df.to_csv(output, sep=\"\\t\", header=False, index=False)\n",
"# output.seek(0)\n",
"# contents = output.getvalue()\n",
"\n",
"# cur.copy_from(output, \"comparedbs\") # , null=\"\") # null values become ''\n",
"# conn.commit()\n",
"# cur.close()\n",
"# conn.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "73de4294-1284-49b0-b31e-45db6e835877",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e37a93e1-fc0e-4d27-9e16-dca6c8aea324",
"metadata": {},
"outputs": [],
"source": [
"start = time.time()\n",
"# %%time\n",
"# Read\n",
"df = pd.read_sql_query('select * from \"comparedbs\"', con=engine)\n",
"end = time.time()\n",
"postgresql_read_time = exec_time(start, end)"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -891,7 +823,7 @@
},
"outputs": [],
"source": [
"df.head()"
"# df.head()"
]
},
{
@ -906,8 +838,8 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "60a990e2-4607-4654-84ec-17d4985adae2",
"execution_count": 10,
"id": "7c7022bf-9c3b-400a-9045-b089483f05ad",
"metadata": {
"tags": []
},
@ -915,32 +847,63 @@
"source": [
"# fazer sem funçao para ver se melhora\n",
"# verifique se esta no ssd os arquivos da pasta git\n",
"def main():\n",
"def s3Connect():\n",
" client = Minio(\n",
" S3MinioUrl,\n",
" secure=False,\n",
" region=S3MinioRegion,\n",
" access_key=\"MatMPA7NyHltz7DQ\",\n",
" secret_key=\"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n",
" access_key=S3MinioUser,\n",
" secret_key=S3MinioKey,\n",
" )\n",
" return client\n",
"\n",
"\n",
" # Make bucket if not exist.\n",
" found = client.bucket_exists(\"data\")\n",
"def s3CreateBucket(bucketName=\"data\"):\n",
" client = s3Connect()\n",
" found = client.bucket_exists(bucketName)\n",
" if not found:\n",
" client.make_bucket(\"data\")\n",
" return client.make_bucket(bucketName)\n",
" else:\n",
" print(\"Bucket 'data' already exists\")\n",
" return \"Bucket '{}' already exists\".format(bucketName)\n",
"\n",
"\n",
" # Upload\n",
"def s3uploadCsv():\n",
" client = s3Connect()\n",
" client.fput_object(\n",
" \"data\",\n",
" \"data.parquet\",\n",
" \"data/data.parquet\",\n",
" )\n",
" # print(\n",
" # \"'data/data.parquet' is successfully uploaded as \"\n",
" # \"object 'data.parquet' to bucket 'data'.\"\n",
" # )"
" return (\n",
" \"'data/data.parquet' is successfully uploaded as \"\n",
" \"object 'data.parquet' to bucket 'data'.\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "cd7fe012-9eee-4f91-8c07-8e0148633766",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def main():\n",
" # Insert to db and benchmark time\n",
" df.to_parquet(\"data/data.parquet\")\n",
" s3CreateBucket()\n",
" start = timeit.default_timer()\n",
" s3uploadCsv()\n",
" stop = timeit.default_timer()\n",
" s3_write_execution_time = stop - start\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" try:\n",
" main()\n",
" except S3Error as exc:\n",
" print(\"error occurred.\", exc)"
]
},
{
@ -1136,8 +1099,6 @@
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"np.bool = np.bool_\n",
"from qpython import qconnection"
]

Loading…
Cancel
Save