|
|
|
@ -47,7 +47,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 12, |
|
|
|
"execution_count": 1, |
|
|
|
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a", |
|
|
|
"metadata": { |
|
|
|
"tags": [] |
|
|
|
@ -59,17 +59,14 @@ |
|
|
|
"False" |
|
|
|
] |
|
|
|
}, |
|
|
|
"execution_count": 12, |
|
|
|
"execution_count": 1, |
|
|
|
"metadata": {}, |
|
|
|
"output_type": "execute_result" |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"import configparser\n", |
|
|
|
"\n", |
|
|
|
"# import pymongo\n", |
|
|
|
"import io\n", |
|
|
|
"import time\n", |
|
|
|
"import timeit\n", |
|
|
|
"from datetime import datetime\n", |
|
|
|
"\n", |
|
|
|
@ -138,7 +135,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 4, |
|
|
|
"execution_count": 5, |
|
|
|
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
@ -176,86 +173,86 @@ |
|
|
|
" </thead>\n", |
|
|
|
" <tbody>\n", |
|
|
|
" <tr>\n", |
|
|
|
" <th>0</th>\n", |
|
|
|
" <td>7730801</td>\n", |
|
|
|
" <td>2023-01-02 15:58:45</td>\n", |
|
|
|
" <td>1672675140000000000</td>\n", |
|
|
|
" <td>2023-01-02 15:59:00</td>\n", |
|
|
|
" <td>1.065995</td>\n", |
|
|
|
" <td>1.066035</td>\n", |
|
|
|
" <td>1.065930</td>\n", |
|
|
|
" <td>1.066070</td>\n", |
|
|
|
" <td>57</td>\n", |
|
|
|
" <th>999995</th>\n", |
|
|
|
" <td>7984748</td>\n", |
|
|
|
" <td>2023-03-03 18:13:30</td>\n", |
|
|
|
" <td>1677867225000000000</td>\n", |
|
|
|
" <td>2023-03-03 18:13:45</td>\n", |
|
|
|
" <td>1.062695</td>\n", |
|
|
|
" <td>1.062635</td>\n", |
|
|
|
" <td>1.062630</td>\n", |
|
|
|
" <td>1.062700</td>\n", |
|
|
|
" <td>64</td>\n", |
|
|
|
" </tr>\n", |
|
|
|
" <tr>\n", |
|
|
|
" <th>1</th>\n", |
|
|
|
" <td>7730802</td>\n", |
|
|
|
" <td>2023-01-02 15:59:00</td>\n", |
|
|
|
" <td>1672675155000000000</td>\n", |
|
|
|
" <td>2023-01-02 15:59:15</td>\n", |
|
|
|
" <td>1.066055</td>\n", |
|
|
|
" <td>1.066085</td>\n", |
|
|
|
" <td>1.066005</td>\n", |
|
|
|
" <td>1.066115</td>\n", |
|
|
|
" <td>52</td>\n", |
|
|
|
" <th>999996</th>\n", |
|
|
|
" <td>7984749</td>\n", |
|
|
|
" <td>2023-03-03 18:13:45</td>\n", |
|
|
|
" <td>1677867240000000000</td>\n", |
|
|
|
" <td>2023-03-03 18:14:00</td>\n", |
|
|
|
" <td>1.062645</td>\n", |
|
|
|
" <td>1.062650</td>\n", |
|
|
|
" <td>1.062625</td>\n", |
|
|
|
" <td>1.062650</td>\n", |
|
|
|
" <td>43</td>\n", |
|
|
|
" </tr>\n", |
|
|
|
" <tr>\n", |
|
|
|
" <th>2</th>\n", |
|
|
|
" <td>7730803</td>\n", |
|
|
|
" <td>2023-01-02 15:59:15</td>\n", |
|
|
|
" <td>1672675170000000000</td>\n", |
|
|
|
" <td>2023-01-02 15:59:30</td>\n", |
|
|
|
" <td>1.066080</td>\n", |
|
|
|
" <td>1.066025</td>\n", |
|
|
|
" <td>1.066025</td>\n", |
|
|
|
" <td>1.066110</td>\n", |
|
|
|
" <td>57</td>\n", |
|
|
|
" <th>999997</th>\n", |
|
|
|
" <td>7984750</td>\n", |
|
|
|
" <td>2023-03-03 18:14:00</td>\n", |
|
|
|
" <td>1677867255000000000</td>\n", |
|
|
|
" <td>2023-03-03 18:14:15</td>\n", |
|
|
|
" <td>1.062640</td>\n", |
|
|
|
" <td>1.062625</td>\n", |
|
|
|
" <td>1.062620</td>\n", |
|
|
|
" <td>1.062665</td>\n", |
|
|
|
" <td>47</td>\n", |
|
|
|
" </tr>\n", |
|
|
|
" <tr>\n", |
|
|
|
" <th>3</th>\n", |
|
|
|
" <td>7730804</td>\n", |
|
|
|
" <td>2023-01-02 15:59:30</td>\n", |
|
|
|
" <td>1672675185000000000</td>\n", |
|
|
|
" <td>2023-01-02 15:59:45</td>\n", |
|
|
|
" <td>1.065980</td>\n", |
|
|
|
" <td>1.065985</td>\n", |
|
|
|
" <td>1.065885</td>\n", |
|
|
|
" <td>1.066045</td>\n", |
|
|
|
" <td>64</td>\n", |
|
|
|
" <th>999998</th>\n", |
|
|
|
" <td>7984751</td>\n", |
|
|
|
" <td>2023-03-03 18:14:15</td>\n", |
|
|
|
" <td>1677867270000000000</td>\n", |
|
|
|
" <td>2023-03-03 18:14:30</td>\n", |
|
|
|
" <td>1.062625</td>\n", |
|
|
|
" <td>1.062535</td>\n", |
|
|
|
" <td>1.062535</td>\n", |
|
|
|
" <td>1.062645</td>\n", |
|
|
|
" <td>43</td>\n", |
|
|
|
" </tr>\n", |
|
|
|
" <tr>\n", |
|
|
|
" <th>4</th>\n", |
|
|
|
" <td>7730805</td>\n", |
|
|
|
" <td>2023-01-02 15:59:45</td>\n", |
|
|
|
" <td>1672675200000000000</td>\n", |
|
|
|
" <td>2023-01-02 16:00:00</td>\n", |
|
|
|
" <td>1.065975</td>\n", |
|
|
|
" <td>1.066055</td>\n", |
|
|
|
" <td>1.065830</td>\n", |
|
|
|
" <td>1.066055</td>\n", |
|
|
|
" <td>50</td>\n", |
|
|
|
" <th>999999</th>\n", |
|
|
|
" <td>7984752</td>\n", |
|
|
|
" <td>2023-03-03 18:14:30</td>\n", |
|
|
|
" <td>1677867285000000000</td>\n", |
|
|
|
" <td>2023-03-03 18:14:45</td>\n", |
|
|
|
" <td>1.062535</td>\n", |
|
|
|
" <td>1.062520</td>\n", |
|
|
|
" <td>1.062520</td>\n", |
|
|
|
" <td>1.062580</td>\n", |
|
|
|
" <td>59</td>\n", |
|
|
|
" </tr>\n", |
|
|
|
" </tbody>\n", |
|
|
|
"</table>\n", |
|
|
|
"</div>" |
|
|
|
], |
|
|
|
"text/plain": [ |
|
|
|
" id from at to \\\n", |
|
|
|
"0 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n", |
|
|
|
"1 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n", |
|
|
|
"2 7730803 2023-01-02 15:59:15 1672675170000000000 2023-01-02 15:59:30 \n", |
|
|
|
"3 7730804 2023-01-02 15:59:30 1672675185000000000 2023-01-02 15:59:45 \n", |
|
|
|
"4 7730805 2023-01-02 15:59:45 1672675200000000000 2023-01-02 16:00:00 \n", |
|
|
|
" id from at \\\n", |
|
|
|
"999995 7984748 2023-03-03 18:13:30 1677867225000000000 \n", |
|
|
|
"999996 7984749 2023-03-03 18:13:45 1677867240000000000 \n", |
|
|
|
"999997 7984750 2023-03-03 18:14:00 1677867255000000000 \n", |
|
|
|
"999998 7984751 2023-03-03 18:14:15 1677867270000000000 \n", |
|
|
|
"999999 7984752 2023-03-03 18:14:30 1677867285000000000 \n", |
|
|
|
"\n", |
|
|
|
" open close min max volume \n", |
|
|
|
"0 1.065995 1.066035 1.065930 1.066070 57 \n", |
|
|
|
"1 1.066055 1.066085 1.066005 1.066115 52 \n", |
|
|
|
"2 1.066080 1.066025 1.066025 1.066110 57 \n", |
|
|
|
"3 1.065980 1.065985 1.065885 1.066045 64 \n", |
|
|
|
"4 1.065975 1.066055 1.065830 1.066055 50 " |
|
|
|
" to open close min max volume \n", |
|
|
|
"999995 2023-03-03 18:13:45 1.062695 1.062635 1.062630 1.062700 64 \n", |
|
|
|
"999996 2023-03-03 18:14:00 1.062645 1.062650 1.062625 1.062650 43 \n", |
|
|
|
"999997 2023-03-03 18:14:15 1.062640 1.062625 1.062620 1.062665 47 \n", |
|
|
|
"999998 2023-03-03 18:14:30 1.062625 1.062535 1.062535 1.062645 43 \n", |
|
|
|
"999999 2023-03-03 18:14:45 1.062535 1.062520 1.062520 1.062580 59 " |
|
|
|
] |
|
|
|
}, |
|
|
|
"execution_count": 4, |
|
|
|
"execution_count": 5, |
|
|
|
"metadata": {}, |
|
|
|
"output_type": "execute_result" |
|
|
|
} |
|
|
|
@ -264,7 +261,7 @@ |
|
|
|
"# %%time\n", |
|
|
|
"# Load Dataset\n", |
|
|
|
"df = pd.read_csv(\"out.csv\", index_col=0)\n", |
|
|
|
"df.head()" |
|
|
|
"df.tail()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
@ -294,7 +291,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
"execution_count": 6, |
|
|
|
"id": "27de1ec8-4de1-440a-b555-b4a46c5ef7ce", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
@ -308,6 +305,7 @@ |
|
|
|
"cell_type": "markdown", |
|
|
|
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58", |
|
|
|
"metadata": { |
|
|
|
"jp-MarkdownHeadingCollapsed": true, |
|
|
|
"tags": [] |
|
|
|
}, |
|
|
|
"source": [ |
|
|
|
@ -316,7 +314,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 22, |
|
|
|
"execution_count": 9, |
|
|
|
"id": "c3202bbb-2655-45b2-b166-9f45a3ef854c", |
|
|
|
"metadata": { |
|
|
|
"tags": [] |
|
|
|
@ -328,7 +326,7 @@ |
|
|
|
"'Database created'" |
|
|
|
] |
|
|
|
}, |
|
|
|
"execution_count": 22, |
|
|
|
"execution_count": 9, |
|
|
|
"metadata": {}, |
|
|
|
"output_type": "execute_result" |
|
|
|
} |
|
|
|
@ -697,6 +695,7 @@ |
|
|
|
"cell_type": "markdown", |
|
|
|
"id": "b9ddfdc6-c899-4f6c-9b4e-8ec6ab6d7e05", |
|
|
|
"metadata": { |
|
|
|
"jp-MarkdownHeadingCollapsed": true, |
|
|
|
"tags": [] |
|
|
|
}, |
|
|
|
"source": [ |
|
|
|
@ -735,14 +734,6 @@ |
|
|
|
"# testar função" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
"id": "e173a45b-60a1-4c33-946e-ccf98bf8e97f", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 18, |
|
|
|
@ -811,65 +802,6 @@ |
|
|
|
"psql_read_execution_time = stop - start" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 10, |
|
|
|
"id": "a7883c4d-4609-4380-8a45-246b7ca2f9c5", |
|
|
|
"metadata": { |
|
|
|
"tags": [] |
|
|
|
}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"ename": "NameError", |
|
|
|
"evalue": "name 'engine' is not defined", |
|
|
|
"output_type": "error", |
|
|
|
"traceback": [ |
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
|
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", |
|
|
|
"File \u001b[0;32m<timed exec>:2\u001b[0m\n", |
|
|
|
"\u001b[0;31mNameError\u001b[0m: name 'engine' is not defined" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"# %%time\n", |
|
|
|
"# # Write\n", |
|
|
|
"# conn = engine.raw_connection()\n", |
|
|
|
"# cur = conn.cursor()\n", |
|
|
|
"# output = io.StringIO()\n", |
|
|
|
"# df.to_csv(output, sep=\"\\t\", header=False, index=False)\n", |
|
|
|
"# output.seek(0)\n", |
|
|
|
"# contents = output.getvalue()\n", |
|
|
|
"\n", |
|
|
|
"# cur.copy_from(output, \"comparedbs\") # , null=\"\") # null values become ''\n", |
|
|
|
"# conn.commit()\n", |
|
|
|
"# cur.close()\n", |
|
|
|
"# conn.close()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
"id": "73de4294-1284-49b0-b31e-45db6e835877", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
"id": "e37a93e1-fc0e-4d27-9e16-dca6c8aea324", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"start = time.time()\n", |
|
|
|
"# %%time\n", |
|
|
|
"# Read\n", |
|
|
|
"df = pd.read_sql_query('select * from \"comparedbs\"', con=engine)\n", |
|
|
|
"end = time.time()\n", |
|
|
|
"postgresql_read_time = exec_time(start, end)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
@ -891,7 +823,7 @@ |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"df.head()" |
|
|
|
"# df.head()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
@ -906,8 +838,8 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
"id": "60a990e2-4607-4654-84ec-17d4985adae2", |
|
|
|
"execution_count": 10, |
|
|
|
"id": "7c7022bf-9c3b-400a-9045-b089483f05ad", |
|
|
|
"metadata": { |
|
|
|
"tags": [] |
|
|
|
}, |
|
|
|
@ -915,32 +847,63 @@ |
|
|
|
"source": [ |
|
|
|
"# fazer sem funçao para ver se melhora\n", |
|
|
|
"# verifique se esta no ssd os arquivos da pasta git\n", |
|
|
|
"def main():\n", |
|
|
|
"def s3Connect():\n", |
|
|
|
" client = Minio(\n", |
|
|
|
" S3MinioUrl,\n", |
|
|
|
" secure=False,\n", |
|
|
|
" region=S3MinioRegion,\n", |
|
|
|
" access_key=\"MatMPA7NyHltz7DQ\",\n", |
|
|
|
" secret_key=\"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n", |
|
|
|
" access_key=S3MinioUser,\n", |
|
|
|
" secret_key=S3MinioKey,\n", |
|
|
|
" )\n", |
|
|
|
" return client\n", |
|
|
|
"\n", |
|
|
|
" # Make bucket if not exist.\n", |
|
|
|
" found = client.bucket_exists(\"data\")\n", |
|
|
|
"\n", |
|
|
|
"def s3CreateBucket(bucketName=\"data\"):\n", |
|
|
|
" client = s3Connect()\n", |
|
|
|
" found = client.bucket_exists(bucketName)\n", |
|
|
|
" if not found:\n", |
|
|
|
" client.make_bucket(\"data\")\n", |
|
|
|
" return client.make_bucket(bucketName)\n", |
|
|
|
" else:\n", |
|
|
|
" print(\"Bucket 'data' already exists\")\n", |
|
|
|
" return \"Bucket '{}' already exists\".format(bucketName)\n", |
|
|
|
"\n", |
|
|
|
"\n", |
|
|
|
" # Upload\n", |
|
|
|
"def s3uploadCsv():\n", |
|
|
|
" client = s3Connect()\n", |
|
|
|
" client.fput_object(\n", |
|
|
|
" \"data\",\n", |
|
|
|
" \"data.parquet\",\n", |
|
|
|
" \"data/data.parquet\",\n", |
|
|
|
" )\n", |
|
|
|
" # print(\n", |
|
|
|
" # \"'data/data.parquet' is successfully uploaded as \"\n", |
|
|
|
" # \"object 'data.parquet' to bucket 'data'.\"\n", |
|
|
|
" # )" |
|
|
|
" return (\n", |
|
|
|
" \"'data/data.parquet' is successfully uploaded as \"\n", |
|
|
|
" \"object 'data.parquet' to bucket 'data'.\"\n", |
|
|
|
" )" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 11, |
|
|
|
"id": "cd7fe012-9eee-4f91-8c07-8e0148633766", |
|
|
|
"metadata": { |
|
|
|
"tags": [] |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"def main():\n", |
|
|
|
" # Insert to db and benchmark time\n", |
|
|
|
" df.to_parquet(\"data/data.parquet\")\n", |
|
|
|
" s3CreateBucket()\n", |
|
|
|
" start = timeit.default_timer()\n", |
|
|
|
" s3uploadCsv()\n", |
|
|
|
" stop = timeit.default_timer()\n", |
|
|
|
" s3_write_execution_time = stop - start\n", |
|
|
|
"\n", |
|
|
|
"\n", |
|
|
|
"if __name__ == \"__main__\":\n", |
|
|
|
" try:\n", |
|
|
|
" main()\n", |
|
|
|
" except S3Error as exc:\n", |
|
|
|
" print(\"error occurred.\", exc)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
@ -1136,8 +1099,6 @@ |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"import numpy as np\n", |
|
|
|
"\n", |
|
|
|
"np.bool = np.bool_\n", |
|
|
|
"from qpython import qconnection" |
|
|
|
] |
|
|
|
|