Browse Source

psql functions

master
flashlan 3 years ago
parent
commit
4f2350bd15
  1. 237
      compareDBs.ipynb

237
compareDBs.ipynb

@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": 11,
"id": "ab6c6c81-6ac1-4668-a79b-a9a0341fb35a",
"metadata": {
"tags": []
@ -59,7 +59,7 @@
"False"
]
},
"execution_count": 65,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -89,7 +89,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"id": "55c3cd57-0996-4723-beb5-8f3196c96009",
"metadata": {
"tags": []
@ -102,7 +102,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 13,
"id": "968403e3-2e5e-4834-b969-be4600e2963a",
"metadata": {
"tags": []
@ -137,10 +137,128 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "3634a4ec-04c2-4f1e-8659-5d22eb17a254",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>from</th>\n",
" <th>at</th>\n",
" <th>to</th>\n",
" <th>open</th>\n",
" <th>close</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7730801</td>\n",
" <td>2023-01-02 15:58:45</td>\n",
" <td>1672675140000000000</td>\n",
" <td>2023-01-02 15:59:00</td>\n",
" <td>1.065995</td>\n",
" <td>1.066035</td>\n",
" <td>1.065930</td>\n",
" <td>1.066070</td>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7730802</td>\n",
" <td>2023-01-02 15:59:00</td>\n",
" <td>1672675155000000000</td>\n",
" <td>2023-01-02 15:59:15</td>\n",
" <td>1.066055</td>\n",
" <td>1.066085</td>\n",
" <td>1.066005</td>\n",
" <td>1.066115</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7730803</td>\n",
" <td>2023-01-02 15:59:15</td>\n",
" <td>1672675170000000000</td>\n",
" <td>2023-01-02 15:59:30</td>\n",
" <td>1.066080</td>\n",
" <td>1.066025</td>\n",
" <td>1.066025</td>\n",
" <td>1.066110</td>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7730804</td>\n",
" <td>2023-01-02 15:59:30</td>\n",
" <td>1672675185000000000</td>\n",
" <td>2023-01-02 15:59:45</td>\n",
" <td>1.065980</td>\n",
" <td>1.065985</td>\n",
" <td>1.065885</td>\n",
" <td>1.066045</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7730805</td>\n",
" <td>2023-01-02 15:59:45</td>\n",
" <td>1672675200000000000</td>\n",
" <td>2023-01-02 16:00:00</td>\n",
" <td>1.065975</td>\n",
" <td>1.066055</td>\n",
" <td>1.065830</td>\n",
" <td>1.066055</td>\n",
" <td>50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id from at to \\\n",
"0 7730801 2023-01-02 15:58:45 1672675140000000000 2023-01-02 15:59:00 \n",
"1 7730802 2023-01-02 15:59:00 1672675155000000000 2023-01-02 15:59:15 \n",
"2 7730803 2023-01-02 15:59:15 1672675170000000000 2023-01-02 15:59:30 \n",
"3 7730804 2023-01-02 15:59:30 1672675185000000000 2023-01-02 15:59:45 \n",
"4 7730805 2023-01-02 15:59:45 1672675200000000000 2023-01-02 16:00:00 \n",
"\n",
" open close min max volume \n",
"0 1.065995 1.066035 1.065930 1.066070 57 \n",
"1 1.066055 1.066085 1.066005 1.066115 52 \n",
"2 1.066080 1.066025 1.066025 1.066110 57 \n",
"3 1.065980 1.065985 1.065885 1.066045 64 \n",
"4 1.065975 1.066055 1.065830 1.066055 50 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# %%time\n",
"# Load Dataset\n",
@ -197,26 +315,50 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 15,
"id": "c3202bbb-2655-45b2-b166-9f45a3ef854c",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'Database created'"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
"name": "stderr",
"output_type": "stream",
"text": [
"Failed to connect to 192.168.1.142:9000\n",
"Traceback (most recent call last):\n",
" File \"/home/sandman/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/connection.py\", line 395, in connect\n",
" return self._init_connection(host, port)\n",
" File \"/home/sandman/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/connection.py\", line 325, in _init_connection\n",
" self.socket = self._create_socket(host, port)\n",
" File \"/home/sandman/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/connection.py\", line 297, in _create_socket\n",
" raise err\n",
" File \"/home/sandman/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/connection.py\", line 288, in _create_socket\n",
" sock.connect(sa)\n",
"TimeoutError: timed out\n"
]
},
{
"ename": "SocketTimeoutError",
"evalue": "Code: 209. (192.168.1.142:9000)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mSocketTimeoutError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[15], line 44\u001b[0m\n\u001b[1;32m 40\u001b[0m client\u001b[38;5;241m.\u001b[39mdisconnect()\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dfQuery\n\u001b[0;32m---> 44\u001b[0m \u001b[43mcHouseCreateDb\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdbname\u001b[49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[15], line 17\u001b[0m, in \u001b[0;36mcHouseCreateDb\u001b[0;34m(databasename)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcHouseCreateDb\u001b[39m(databasename):\n\u001b[1;32m 16\u001b[0m client \u001b[38;5;241m=\u001b[39m cHouseConnect()\n\u001b[0;32m---> 17\u001b[0m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCREATE TABLE IF NOT EXISTS \u001b[39;49m\u001b[38;5;132;43;01m{}\u001b[39;49;00m\u001b[38;5;124;43m (id UInt32,\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrom DateTime, at UInt64, to DateTime, open Float64,\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mclose Float64, min Float64, max Float64, volume UInt32)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mENGINE MergeTree ORDER BY to\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdatabasename\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 23\u001b[0m client\u001b[38;5;241m.\u001b[39mdisconnect()\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatabase created\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
"File \u001b[0;32m~/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/client.py:361\u001b[0m, in \u001b[0;36mClient.execute\u001b[0;34m(self, query, params, with_column_types, external_tables, query_id, settings, types_check, columnar)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;124;03mExecutes query.\u001b[39;00m\n\u001b[1;32m 320\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;124;03m and types.\u001b[39;00m\n\u001b[1;32m 357\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 359\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time()\n\u001b[0;32m--> 361\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdisconnect_on_error(query, settings):\n\u001b[1;32m 362\u001b[0m \u001b[38;5;66;03m# INSERT queries can use list/tuple/generator of list/tuples/dicts.\u001b[39;00m\n\u001b[1;32m 363\u001b[0m \u001b[38;5;66;03m# For SELECT parameters can be passed in only in dict right now.\u001b[39;00m\n\u001b[1;32m 364\u001b[0m is_insert \u001b[38;5;241m=\u001b[39m \u001b[38;5;28misinstance\u001b[39m(params, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m, types\u001b[38;5;241m.\u001b[39mGeneratorType))\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_insert:\n",
"File \u001b[0;32m~/dev/anaconda3/lib/python3.10/contextlib.py:135\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__enter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwds, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfunc\n\u001b[1;32m 134\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgen\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgenerator didn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt yield\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/client.py:305\u001b[0m, in \u001b[0;36mClient.disconnect_on_error\u001b[0;34m(self, query, settings)\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[38;5;129m@contextmanager\u001b[39m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdisconnect_on_error\u001b[39m(\u001b[38;5;28mself\u001b[39m, query, settings):\n\u001b[1;32m 304\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 305\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mestablish_connection\u001b[49m\u001b[43m(\u001b[49m\u001b[43msettings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrack_current_database(query)\n",
"File \u001b[0;32m~/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/client.py:292\u001b[0m, in \u001b[0;36mClient.establish_connection\u001b[0;34m(self, settings)\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnection \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_connection()\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmake_query_settings(settings)\n\u001b[0;32m--> 292\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforce_connect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlast_query \u001b[38;5;241m=\u001b[39m QueryInfo()\n\u001b[1;32m 295\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (errors\u001b[38;5;241m.\u001b[39mSocketTimeoutError, errors\u001b[38;5;241m.\u001b[39mNetworkError):\n",
"File \u001b[0;32m~/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/connection.py:254\u001b[0m, in \u001b[0;36mConnection.force_connect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcheck_query_execution()\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconnected:\n\u001b[0;32m--> 254\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mping():\n\u001b[1;32m 257\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mConnection was closed, reconnecting.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"File \u001b[0;32m~/dev/pipenv/lib/python3.10/site-packages/clickhouse_driver/connection.py:416\u001b[0m, in \u001b[0;36mConnection.connect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhosts\u001b[38;5;241m.\u001b[39mrotate(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m err \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 416\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n",
"\u001b[0;31mSocketTimeoutError\u001b[0m: Code: 209. (192.168.1.142:9000)"
]
}
],
"source": [
"# !! O client oficial usa um driver http, nesse exemplo vamos usar a biblioteca\n",
"# de terceirtos clickhouse_driver recomendada, por sua vez que usa tcp.\n",
"# !! driver tcp.\n",
"def cHouseConnect():\n",
" client = Client(\n",
" host=ClickHouseUrl,\n",
@ -587,38 +729,81 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 23,
"id": "16cd8eb7-333d-43fd-88e0-ee983645d3fd",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax. Perhaps you forgot a comma? (1520588424.py, line 4)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[23], line 4\u001b[0;36m\u001b[0m\n\u001b[0;31m \"postgresql+psycopg2://{}:{}@{}:5432/{}\".format(\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax. Perhaps you forgot a comma?\n"
]
}
],
"source": [
"# Connect / Create Tables\n",
"engine = create_engine(\n",
" \"postgresql+psycopg2://{}:{}@{}:5432/{}\".format(\n",
" PostgresqlUser, PostgresqlKey, PostgresqlUrl, PostgresqlDB\n",
"def psqlConnect():\n",
" engine = create_engine(\n",
" \"postgresql+psycopg2://{}:{}@{}:5432/{}\".format(\n",
" PostgresqlUser, PostgresqlKey, PostgresqlUrl, PostgresqlDB\n",
" )\n",
" )\n",
")"
" return engine\n",
"\n",
"\n",
"psqlConnect()\n",
"# testar função"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e173a45b-60a1-4c33-946e-ccf98bf8e97f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 26,
"id": "be31f3a0-b7ed-48e6-9b65-dc16319fb8d1",
"metadata": {},
"outputs": [],
"source": [
"# Drop old table and create new empty table\n",
"df.head(0).to_sql(\"comparedbs\", engine, if_exists=\"replace\", index=False)"
"def psqlCreateTables(databaseName):\n",
" engine = psqlConnect()\n",
" df.head(0).to_sql(databaseName, engine, if_exists=\"replace\", index=False)\n",
" # disconnect()\n",
" return 0\n",
"\n",
"\n",
"psqlCreateTables(dbname)\n",
"# testar função"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 27,
"id": "a7883c4d-4609-4380-8a45-246b7ca2f9c5",
"metadata": {
"tags": []
},
"outputs": [],
"outputs": [
{
"ename": "AttributeError",
"evalue": "'int' object has no attribute 'raw_connection'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m<timed exec>:2\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'raw_connection'"
]
}
],
"source": [
"%%time\n",
"# Write\n",

Loading…
Cancel
Save