|
|
@ -196,7 +196,6 @@ |
|
|
"cell_type": "markdown", |
|
|
"cell_type": "markdown", |
|
|
"id": "274cc026-2f48-4e38-b80f-b1a9ff982060", |
|
|
"id": "274cc026-2f48-4e38-b80f-b1a9ff982060", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"jp-MarkdownHeadingCollapsed": true, |
|
|
|
|
|
"tags": [] |
|
|
"tags": [] |
|
|
}, |
|
|
}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
@ -221,7 +220,6 @@ |
|
|
"cell_type": "markdown", |
|
|
"cell_type": "markdown", |
|
|
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58", |
|
|
"id": "4a8d5703-9bc9-4d38-83ff-457159304d58", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"jp-MarkdownHeadingCollapsed": true, |
|
|
|
|
|
"tags": [] |
|
|
"tags": [] |
|
|
}, |
|
|
}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
@ -470,33 +468,12 @@ |
|
|
"cell_type": "markdown", |
|
|
"cell_type": "markdown", |
|
|
"id": "f9e0393d-7d1d-406a-a068-9dbf4968e977", |
|
|
"id": "f9e0393d-7d1d-406a-a068-9dbf4968e977", |
|
|
"metadata": { |
|
|
"metadata": { |
|
|
"jp-MarkdownHeadingCollapsed": true, |
|
|
|
|
|
"tags": [] |
|
|
"tags": [] |
|
|
}, |
|
|
}, |
|
|
"source": [ |
|
|
"source": [ |
|
|
"### S3 Parquet" |
|
|
"### S3 Parquet" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "98cf93c9-cb63-436c-809b-ef3ff4c3d8a5", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# fs = s3fs.S3FileSystem(\n", |
|
|
|
|
|
"# anon=False,\n", |
|
|
|
|
|
"# use_ssl=False,\n", |
|
|
|
|
|
"# client_kwargs={\n", |
|
|
|
|
|
"# \"region_name\": S3MinioRegion,\n", |
|
|
|
|
|
"# \"endpoint_url\": S3MinioUrl,\n", |
|
|
|
|
|
"# \"aws_access_key_id\": \"MatMPA7NyHltz7DQ\",\n", |
|
|
|
|
|
"# \"aws_secret_access_key\": \"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n", |
|
|
|
|
|
"# \"verify\": False,\n", |
|
|
|
|
|
"# },\n", |
|
|
|
|
|
"# )" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
{ |
|
|
"cell_type": "code", |
|
|
"cell_type": "code", |
|
|
"execution_count": 72, |
|
|
"execution_count": 72, |
|
|
@ -509,8 +486,6 @@ |
|
|
"# fazer sem funçao para ver se melhora\n", |
|
|
"# fazer sem funçao para ver se melhora\n", |
|
|
"# verifique se esta no ssd os arquivos da pasta git\n", |
|
|
"# verifique se esta no ssd os arquivos da pasta git\n", |
|
|
"def main():\n", |
|
|
"def main():\n", |
|
|
" # Create a client with the MinIO server playground, its access key\n", |
|
|
|
|
|
" # and secret key.\n", |
|
|
|
|
|
" client = Minio(\n", |
|
|
" client = Minio(\n", |
|
|
" S3MinioUrl,\n", |
|
|
" S3MinioUrl,\n", |
|
|
" secure=False,\n", |
|
|
" secure=False,\n", |
|
|
@ -519,15 +494,14 @@ |
|
|
" secret_key=\"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n", |
|
|
" secret_key=\"SO1IG5iBPSjNPZanYUaHCLcoSbjphLCP\",\n", |
|
|
" )\n", |
|
|
" )\n", |
|
|
"\n", |
|
|
"\n", |
|
|
" # Make 'asiatrip' bucket if not exist.\n", |
|
|
" # Make bucket if not exist.\n", |
|
|
" found = client.bucket_exists(\"data\")\n", |
|
|
" found = client.bucket_exists(\"data\")\n", |
|
|
" if not found:\n", |
|
|
" if not found:\n", |
|
|
" client.make_bucket(\"data\")\n", |
|
|
" client.make_bucket(\"data\")\n", |
|
|
" else:\n", |
|
|
" else:\n", |
|
|
" print(\"Bucket 'data' already exists\")\n", |
|
|
" print(\"Bucket 'data' already exists\")\n", |
|
|
"\n", |
|
|
"\n", |
|
|
" # Upload '/home/user/Photos/asiaphotos.zip' as object name\n", |
|
|
" # Upload\n", |
|
|
" # 'asiaphotos-2015.zip' to bucket 'asiatrip'.\n", |
|
|
|
|
|
" client.fput_object(\n", |
|
|
" client.fput_object(\n", |
|
|
" \"data\",\n", |
|
|
" \"data\",\n", |
|
|
" \"data.parquet\",\n", |
|
|
" \"data.parquet\",\n", |
|
|
@ -720,102 +694,6 @@ |
|
|
"pq.head()" |
|
|
"pq.head()" |
|
|
] |
|
|
] |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "62b5ecf6-1178-4824-9c97-91522abcde93", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"tags": [] |
|
|
|
|
|
}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# # from friendly.jupyter import Friendly\n", |
|
|
|
|
|
"# path_to_s3_object = \"http://192.168.1.125:9000/obsidian/sample.parquet\"\n", |
|
|
|
|
|
"# # df = to_df(data)\n", |
|
|
|
|
|
"# df.to_parquet(\"data/data.parquet\")" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "55dd51a3-7387-467c-95f0-6c282c4135eb", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"tags": [] |
|
|
|
|
|
}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# with fs.open(\"obsidian/data.parquet\", \"wb\") as f:\n", |
|
|
|
|
|
"# df.to_parquet(f)" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "0c351614-8373-4822-a423-20412c92a6eb", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# s3_filepath = \"obsidian/data.parquet\"\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"# pq.write_to_dataset(\n", |
|
|
|
|
|
"# Table.from_pandas(df),\n", |
|
|
|
|
|
"# s3_filepath,\n", |
|
|
|
|
|
"# filesystem=fs,\n", |
|
|
|
|
|
"# use_dictionary=True,\n", |
|
|
|
|
|
"# compression=\"snappy\",\n", |
|
|
|
|
|
"# version=\"2.4\",\n", |
|
|
|
|
|
"# )" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "480cc01f-3239-4f7d-b20a-70c17d59d6f6", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"tags": [] |
|
|
|
|
|
}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# pq.write_to_dataset(\n", |
|
|
|
|
|
"# Table.from_pandas(df),\n", |
|
|
|
|
|
"# path_to_s3_object,\n", |
|
|
|
|
|
"# filesystem=fs,\n", |
|
|
|
|
|
"# use_dictionary=True,\n", |
|
|
|
|
|
"# compression=\"snappy\",\n", |
|
|
|
|
|
"# version=\"2.0\",\n", |
|
|
|
|
|
"# )" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"id": "c33f70ac-cfcf-4024-af86-c08bcc60d9a5", |
|
|
|
|
|
"metadata": {}, |
|
|
|
|
|
"outputs": [], |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# path_to_s3_object = \"s3://sample-bucket/path/to/sample.parquet\"\n", |
|
|
|
|
|
"\n", |
|
|
|
|
|
"# data = [\n", |
|
|
|
|
|
"# {\n", |
|
|
|
|
|
"# \"hoge\": 1,\n", |
|
|
|
|
|
"# \"foo\": \"blah\",\n", |
|
|
|
|
|
"# },\n", |
|
|
|
|
|
"# {\n", |
|
|
|
|
|
"# \"boo\": \"test\",\n", |
|
|
|
|
|
"# \"bar\": 123,\n", |
|
|
|
|
|
"# },\n", |
|
|
|
|
|
"# ]\n", |
|
|
|
|
|
"# df = to_df(data)\n", |
|
|
|
|
|
"# pq.write_to_dataset(\n", |
|
|
|
|
|
"# Table.from_pandas(df),\n", |
|
|
|
|
|
"# path_to_s3_object,\n", |
|
|
|
|
|
"# filesystem=fs,\n", |
|
|
|
|
|
"# use_dictionary=True,\n", |
|
|
|
|
|
"# compression=\"snappy\",\n", |
|
|
|
|
|
"# version=\"2.0\",\n", |
|
|
|
|
|
"# )" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
{ |
|
|
"cell_type": "markdown", |
|
|
"cell_type": "markdown", |
|
|
"id": "50d1fc58-89a7-4507-aff0-6e943656cfe0", |
|
|
"id": "50d1fc58-89a7-4507-aff0-6e943656cfe0", |
|
|
|