百度地图小区边界爬取
1、准备需要爬取的小区名称,存放在txt文本中
# 从文件中读取小区名称
def read_residential_names(file_path):"""从文件中读取小区名称:param file_path: 文件路径:return: 小区名称列表"""if not os.path.exists(file_path):print(f"File not found: {file_path}")return []with open(file_path, "r", encoding="utf-8") as file:names = [line.strip() for line in file.readlines() if line.strip()]return names
2、需要根据住宅区名称和所在地区获取其UID
def get_residential_uid(residential_name, region, bmap_key):"""根据住宅区名称和所在地区获取其UID:param residential_name: 住宅区名称:param region: 地区:param bmap_key: 百度地图API密钥:return: UID或None"""bmap_localsearch_url = f"http://api.map.baidu.com/place/v2/search?query={residential_name}®ion={region}&output=json&city_limit=true&ak={bmap_key}"s = requests.Session()s.mount('http://', HTTPAdapter(max_retries=3))s.mount('https://', HTTPAdapter(max_retries=3))try:response = s.get(bmap_localsearch_url, timeout=5, headers={"Connection": "close"})data = response.json()if data['status'] == 0 and len(data['results']) > 0:for info in data['results']:if '-' not in info['name']:return info['uid']print(f"No valid UID found for {residential_name} in {region}")return Noneelse:print(f"No results found for {residential_name} in {region}")return Noneexcept Exception as e:print(f"Error in get_residential_uid: {e}\nURL: {bmap_localsearch_url}")return None
3、根据UID获取住宅区的边界信息
def get_boundary_by_uid(uid, bmap_key):"""根据UID获取住宅区的边界信息:param uid: 百度地图目标UID:param bmap_key: 百度地图API密钥:return: 边界坐标字符串或None"""bmap_boundary_url = f"http://map.baidu.com/?reqflag=pcmap&from=webmap&qt=ext&uid={uid}&ext_ver=new&l=18&ak={bmap_key}"s = requests.Session()s.mount('http://', HTTPAdapter(max_retries=3))s.mount('https://', HTTPAdapter(max_retries=3))try:response = s.get(bmap_boundary_url, timeout=5, headers={"Connection": "close"})data = response.json()if 'content' in data and 'geo' in data['content']:geo = data['content']['geo']coordinates = []for point in geo.split('|')[2].split('-')[1].split(','):coordinates.append(point.strip(';'))boundary = ';'.join([f"{coordinates[i]},{coordinates[i + 1]}" for i in range(0, len(coordinates), 2)])return boundaryelse:print(f"No boundary information found for UID: {uid}")return Noneexcept Exception as e:print(f"Error in get_boundary_by_uid: {e}\nURL: {bmap_boundary_url}")return None
4、解析百度地图返回的geo数据,提取坐标点
def parse_geo_data(geo_data):"""解析百度地图返回的geo数据,提取坐标点:param geo_data: 百度地图返回的geo字符串:return: 包含(x, y)坐标对的列表"""if not geo_data or '|' not in geo_data:return []try:# 提取详细坐标部分coordinates = geo_data.split('|')[2].split('-')[1].split(',')# 将坐标转换为(x, y)对return [(float(coordinates[i].strip(';')), float(coordinates[i+1].strip(';'))) for i in range(0, len(coordinates)-1, 2)]except Exception as e:print(f"Error parsing geo data: {e}")return []
5、将Web Mercator坐标转换为WGS-84经纬度坐标
def web_mercator_to_wgs84(x, y):"""将Web Mercator坐标转换为WGS-84经纬度坐标:param x: Web Mercator X坐标:param y: Web Mercator Y坐标:return: WGS-84经纬度坐标 (lon, lat)"""transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")return transformer.transform(x, y)
6、将数据保存到CSV文件中
def save_to_csv(data, filename="output.csv"):"""将数据保存到CSV文件中:param data: 包含坐标的字典:param filename: 输出文件名"""# 获取文件的目录部分directory = os.path.dirname(filename)# 如果目录不为空,则创建目录if directory:os.makedirs(directory, exist_ok=True)# 写入CSV文件with open(filename, mode="w", newline="", encoding="utf-8") as file:writer = csv.writer(file)writer.writerow(["Residential Name", "Longitude", "Latitude"]) # 写入表头for name, coords in data.items():for coord in coords.split(';'):lon, lat = coord.split(',')writer.writerow([name, lon, lat]) # 写入每一行数据print(f"Data saved to {filename}")
7、主函数,bmap_key输入百度地图API密钥,region 输入默认查询地区, input_file 输入小区名称存储文件。
if __name__ == "__main__":bmap_key = "***" # 替换为你的百度地图API密钥region = "北京" # 默认查询地区input_file = "**.txt" # 小区名称文件output_file = "transformed_coordinates.csv" # 输出文件
完整代码下载
https://download.csdn.net/download/cc605523/90592963