合并大量netcdf数据的代码
合并大量NC数据的代码
import xarray as xr
import numpy as np
import glob
import os
from datetime import datetime
from tqdm import tqdm
import dask# Path to your NetCDF files
file_path = r'F:\keshan\2016\*.nc'# Open all NetCDF files in the folder
nc_files = sorted(glob.glob(file_path))# Get total number of files
total_files = len(nc_files)print(f"Processing {total_files} files...")# Manually create latitude and longitude values
lat_values = np.linspace(-90, 90, 1800)
lon_values = np.linspace(-180, 180, 3600)# Function to process a single file
def process_file(file):ds = xr.open_dataset(file, chunks={'lat': 400, 'lon': 400})ds = ds.assign_coords(lat=('lat', lat_values), lon=('lon', lon_values))# Extract date from filenamefilename = os.path.basename(file)filename_without_extension = os.path.splitext(filename)[0]date_str = filename_without_extension.split('_')[4:8]date = datetime.strptime('_'.join(date_str), '%Y_%m_%d_%H')# Rename 'total_par' to the dateds = ds.rename({'total_par': date.strftime('%Y%m%d_%H')})return ds# Process files in batches
batch_size = 10 # Adjust this based on your system's memory
output_path = r'stacked_corrected_dataset.nc'for i in range(0, total_files, batch_size):batch_files = nc_files[i:i+batch_size]print(f"Processing batch {i//batch_size + 1}/{(total_files-1)//batch_size + 1}")# Process files with progress bardatasets = []for file in tqdm(batch_files, desc="Processing files"):datasets.append(process_file(file))print("Combining datasets in batch...")# Combine datasets in the current batchcombined_ds = xr.merge(datasets)print(f"Saving batch {i//batch_size + 1}...")# Append to existing file or create new filemode = 'a' if i > 0 else 'w'encoding = {var: {'zlib': True, 'complevel': 1} for var in combined_ds.data_vars}with dask.config.set(scheduler='threads', num_workers=4): # Adjust num_workers as neededcombined_ds.to_netcdf(output_path, mode=mode, encoding=encoding)# Clear memorydel datasetsdel combined_dsprint(f"Stacked and corrected dataset saved to {output_path}")
单个nc数据
共计2000多个nc数据,按照日期合并。
合并后的nc数据