{"id":102,"date":"2019-01-05T23:21:03","date_gmt":"2019-01-05T15:21:03","guid":{"rendered":"http:\/\/www.threebrush.com\/wordpress\/?p=102"},"modified":"2022-01-24T14:05:55","modified_gmt":"2022-01-24T06:05:55","slug":"institutionalshareholdingreptilefromeastmoney","status":"publish","type":"post","link":"http:\/\/www.threebrush.com\/wordpress\/index.php\/2019\/01\/05\/institutionalshareholdingreptilefromeastmoney\/","title":{"rendered":"\u4ece\u4e1c\u65b9\u8d22\u5bcc\u6293\u53d6\u673a\u6784\u6301\u80a1\u6570\u636e"},"content":{"rendered":"<p>\u5728\u7ecf\u5178\u7684\u52a8\u91cf\u9009\u80a1\u7b56\u7565\u4e2d\uff0c\u6709\u4e00\u4e2a\u6bd4\u8f83\u7ecf\u5178\u7684\u7279\u5f81\uff0c\u5c31\u662f\u673a\u6784\u6301\u80a1\u6bd4\u4f8b\uff0c\u4e00\u822c\u6765\u8bf4\u673a\u6784\u6301\u80a1\u6bd4\u4f8b\u9ad8\u7684\u80a1\u7968\u8ddf\u57fa\u672c\u9762\u56e0\u5b50\u7684\u76f8\u5173\u5ea6\u66f4\u9ad8<\/p>\n<p>\u5728\u805a\u5bbd\u548c\u4f18\u77ff\u7684\u6570\u636e\u4e2d\u90fd\u6ca1\u6709\u673a\u6784\u6301\u80a1\u6bd4\u4f8b\u8fd9\u4e2a\u6570\u636e\uff0c\u4f46\u662f\u5728\u4e1c\u65b9\u8d22\u5bcc\u7f51\u7ad9\u4e0a\u6709\u8fd9\u4e2a\u6570\u636e\uff0c\u4f8b\u5982\u5408\u5174\u5305\u88c5\u7684\u673a\u6784\u6301\u80a1\u7edf\u8ba1\u5728\u8fd9\u4e2a\u7f51\u9875\u53ef\u4ee5\u770b\u5230<\/p>\n<p>http:\/\/data.eastmoney.com\/zlsj\/detail\/2018-09-30-0-002228.html<\/p>\n<p><img decoding=\"async\" loading=\"lazy\" class=\"alignnone size-medium wp-image-103\" src=\"http:\/\/www.threebrush.com\/wordpress\/wp-content\/uploads\/2019\/01\/\u673a\u6784\u6301\u80a1-300x132.jpg\" alt=\"\" width=\"300\" height=\"132\" srcset=\"http:\/\/www.threebrush.com\/wordpress\/wp-content\/uploads\/2019\/01\/\u673a\u6784\u6301\u80a1-300x132.jpg 300w, http:\/\/www.threebrush.com\/wordpress\/wp-content\/uploads\/2019\/01\/\u673a\u6784\u6301\u80a1-768x339.jpg 768w, http:\/\/www.threebrush.com\/wordpress\/wp-content\/uploads\/2019\/01\/\u673a\u6784\u6301\u80a1.jpg 877w\" sizes=\"(max-width: 300px) 100vw, 300px\" \/><\/p>\n<p>\u4e3a\u4e86\u80fd\u591f\u5728\u91cf\u5316\u7b56\u7565\u4e2d\u4f7f\u7528\u8fd9\u4e2a\u56e0\u5b50\uff0c\u51b3\u5b9a\u624b\u52a8\u5199\u4ee3\u7801\u4ece\u7f51\u7ad9\u6293\u53d6\u8fd9\u4e2a\u6570\u636e\uff0c<\/p>\n<p>python\u6293\u8fd9\u4e2a\u6570\u636e\u4f7f\u7528requests_html\u7684HTMLSession\u6216\u8005\u7528urllib+&nbsp;bs4\u90fd\u53ef\u4ee5\u5b9e\u73b0<\/p>\n<p>\u4f7f\u7528urllib+&nbsp;bs4\u7684\u4ee3\u7801\u5982\u4e0b<\/p>\n<pre>[cc lang=\"python\"]\nimport urllib\nimport bs4\n\n#\u4e0b\u8f7d\u9875\u9762\n\ndef get_html(url):\n    html = urllib.request.urlopen(url).read()\n    html = html.decode('gbk')\n\nreturn html\n\n#\u89e3\u6790\u9875\u9762\u6293\u53d6\u9700\u8981\u7684\u6570\u636e\n\ndef get_page(url):\n    html = get_html(url)\n    soup = bs4.BeautifulSoup(html, \"html.parser\")\n    table = soup.find('table', class_='tab1')\n    holdElement = table.select('td')\n    if (holdElement == None):\n        print(\"hold empty\");\n\n#\u57fa\u91d1\u6301\u80a1\u5bb6\u6570\ntry:\n    fund_num = int(holdElement[2].text)\nexcept:\n    fund_num = 0\n\n#\u57fa\u91d1\u6301\u80a1\u80a1\u6570\ntry:\n    fund_hold_vol = float(holdElement[3].text)\nexcept:\n    fund_hold_vol = 0.0\n\n#\u57fa\u91d1\u6301\u80a1\u5e02\u503c\ntry:\n    fund_hold_value = float(holdElement[4].text)\nexcept:\n    fund_hold_value = 0.0\n\n#\u57fa\u91d1\u6301\u80a1\u5360\u603b\u5e02\u503c\u6bd4\u4f8b\ntry:\n    fund_ratio = float(holdElement[5].text)\nexcept:\n    fund_ratio = 0.0\n\n#\u57fa\u91d1\u6301\u80a1\u5360\u6d41\u901a\u5e02\u503c\u6bd4\u4f8b\ntry:\n    fund_ratio_in_circu = float(holdElement[6].text)\nexcept:\n    fund_ratio_in_circu = 0.0\n\n#\u673a\u6784\u6c47\u603b\u603b\u5bb6\u6570\ntry:\n    total_num = int(holdElement[38].text)\nexcept:\n    total_num = 0\n\n#\u673a\u6784\u6c47\u603b\u6301\u80a1\u80a1\u6570\ntry:\n    total_hold_vol = float(holdElement[39].text)\nexcept:\n    total_hold_vol = 0.0\n\n#\u673a\u6784\u6c47\u603b\u6301\u80a1\u5e02\u503c\ntry:\n    total_hold_value = float(holdElement[40].text)\nexcept: \n    total_hold_value = 0.0\n\n#\u673a\u6784\u6c47\u603b\u5360\u603b\u80a1\u672c\u6bd4\u4f8b\ntry:\n    total_ratio = float(holdElement[41].text)\nexcept:\n    total_ratio = 0.0\n\n#\u673a\u6784\u6c47\u603b\u5360\u6d41\u901a\u80a1\u672c\u6bd4\u4f8b\n\ntry:\n    total_ratio_in_circu = float(holdElement[42].text)\nexcept:\n    total_ratio_in_circu = 0.0\nreturn (code, end_date, fund_num, fund_hold_vol, fund_hold_value, fund_ratio, fund_ratio_in_circu,\ntotal_num, total_hold_vol,total_hold_value, total_ratio, total_ratio_in_circu)\n[\/cc]<\/pre>\n<p>\u62ff\u5230\u8fd9\u4e9b\u6570\u636e\u540e\uff0c\u518d\u5199\u5165mysql\u6216\u8005csv\uff0c\u5c31\u53ef\u4ee5\u5728\u7b56\u7565\u4e2d\u4f7f\u7528\u4e86<\/p>\n<pre>[cc lang=\"python\"]\nend_dates=('2017-03-31','2017-06-30','2017-09-30','2017-12-31','2018-03-31','2018-06-30')\n#end_date='2018-09-30'\nfor end_date in end_dates:\n    for code in df_stocks['code']:\n        url = \"http:\/\/data.eastmoney.com\/zlsj\/detail\/%s-0-%s.html\"\n        args = get_page(url % (end_date,code[:-5]))\n        #\u67e5\u8be2\u6570\u636e\u5e93\u5224\u65ad\u662f\u5426\u5b58\u5728\u8bb0\u5f55\n        sql_select = \"SELECT * FROM institutional_hold WHERE code = '%s' AND end_date='%s' \"\n        cur = connect.cursor()\n        record_count = cur.execute(sql_select % (code, end_date))\n        \n        if record_count &gt; 0:\n            continue\n\n        column_str = \"\"\"code, end_date, fund_num, fund_hold_vol, fund_hold_value, fund_ratio, \n        fund_ratio_in_circu,total_num, total_hold_vol,total_hold_value, total_ratio, \ntotal_ratio_in_circu\"\"\"\n        insert_str = \"'%s','%s',%d,%.2f,%.2f,%.2f,%.2f,%d,%.2f,%.2f,%.2f,%.2f\"\n        sql_insert = \"INSERT INTO institutional_hold (%s) VALUES (%s)\" % (column_str, insert_str)\n\n        try:\n            cur.execute(sql_insert % args)\n            connect.commit()\n        except:\n            connect.rollback()\n        print(str(code) + ' ' + str(end_date), end= '\\r')\n[\/cc]<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u7ecf\u5178\u7684\u52a8\u91cf\u9009\u80a1\u7b56\u7565\u4e2d\uff0c\u6709\u4e00\u4e2a\u6bd4\u8f83\u7ecf\u5178\u7684\u7279\u5f81\uff0c\u5c31\u662f\u673a\u6784\u6301\u80a1\u6bd4\u4f8b\uff0c\u4e00\u822c\u6765\u8bf4\u673a\u6784\u6301\u80a1&#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[6],"tags":[],"_links":{"self":[{"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/posts\/102"}],"collection":[{"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/comments?post=102"}],"version-history":[{"count":17,"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/posts\/102\/revisions"}],"predecessor-version":[{"id":152,"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/posts\/102\/revisions\/152"}],"wp:attachment":[{"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/media?parent=102"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/categories?post=102"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.threebrush.com\/wordpress\/index.php\/wp-json\/wp\/v2\/tags?post=102"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}