Get Echo Data from the Pacific Hake Survey#

This notebook uses EK60 echosounder data collected during the 2017 Joint U.S.-Canada Integrated Ecosystem and Pacific Hake Acoustic Trawl Survey (‘Pacific Hake Survey’) to illustrate a common workflow for data conversion, calibration and visualization using echopype and echoshader.

Ten days of cloud-hosted .raw data files are accessed by echopype directly from an Amazon  Web  Services  (AWS)  S3 “bucket” maintained by the NOAA NCEI Water-Column Sonar Data Archive. The total data used are 365 .raw files at approximately 25 MB each (1 Hz pinging rate from first light to dusk), corresponding to approximately 40 GB. With echopype, each file is converted to a standardized representation.

Establish AWS S3 file system connection and Process S3-hosted raw files with echopype#

We encourage importing echopype as ep for consistency.

from pathlib import Path
import itertools as it
import datetime as dt
from dateutil import parser as dtparser

import fsspec
from urllib import request

import echopype as ep
import xarray as xr
data_type='Hake'
range_meter_bin = 0.2
ping_time_bin = '2s'
start_datetime = dt.datetime(2017, 7, 24, 0, 0)
end_datetime = dt.datetime(2017, 8, 24, 23, 59)
base_dpath = Path('./'+f"{data_type}_range={range_meter_bin}_ping={ping_time_bin}_start={start_datetime.strftime('%Y-%m-%d-%H-%M')}_end={end_datetime.strftime('%Y-%m-%d-%H-%M')}"
)
base_dpath.mkdir(exist_ok=True)

calibrated_dpath = (base_dpath / 'HakeSurvey')
calibrated_dpath.mkdir(exist_ok=True)

lon_dpath = (base_dpath / 'Lon')
lon_dpath.mkdir(exist_ok=True)

lat_dpath = (base_dpath / 'Lat')
lat_dpath.mkdir(exist_ok=True)
fs = fsspec.filesystem('s3', anon=True)

bucket = "ncei-wcsd-archive"
rawdirpath = "data/raw/Bell_M._Shimada/SH1707/EK60"
s3rawfiles = fs.glob(f"{bucket}/{rawdirpath}/*.raw")
date_list = []

for x in range(0, (end_datetime-start_datetime).days + 1):
    date = start_datetime + dt.timedelta(days = x)
    date_list.append(str(date.month).zfill(2)+str(date.day).zfill(2))
s3rawfiles = [
    s3path for s3path in s3rawfiles 
    if any([f"D2017{datestr}" in s3path for datestr in date_list])
]

print(f"There are {len(s3rawfiles)} target raw files available")
for s3rawfpath in s3rawfiles:
    raw_fpath = Path(s3rawfpath)
    try:
        # Access file directly from S3 to create a converted EchoData object in memory
        ed = ep.open_raw(
            f"s3://{s3rawfpath}",
            sonar_model='EK60',
            storage_options={'anon': True}
        )
        
        
        # Use the EchoData object "ed" to generate calibrated and
        # computed MVBS files that will be saved to netcdf
        ds_Sv = ep.calibrate.compute_Sv(ed)
        
        ds_MVBS = ep.commongrid.compute_MVBS(
            ds_Sv,
            range_meter_bin=range_meter_bin,  # in meters
            ping_time_bin=ping_time_bin  # in seconds
        )
        
        ds_MVBS.to_netcdf(calibrated_dpath / f"MVBS_{raw_fpath.stem}.nc")

        ds_lon = ed['Platform'].longitude
        
        ds_lon.to_netcdf(lon_dpath / f"MVBS_{raw_fpath.stem}.nc")
        
        ds_lat = ed['Platform'].latitude
        
        ds_lat.to_netcdf(lat_dpath / f"MVBS_{raw_fpath.stem}.nc")
        
        print(f"MVBS_{raw_fpath.stem}.nc created")

    except Exception as e:
        print(f"Failed to process raw file {raw_fpath.name}: {e}")

Combine MVBS with Geographic Coordinates#

MVBS_ds = xr.open_mfdataset(
    str(calibrated_dpath / 'MVBS_*.nc'), 
    data_vars='minimal', coords='minimal',
    combine='by_coords'
)

longitude = xr.open_mfdataset(
    str(lon_dpath / '*.nc'),
    data_vars='minimal', coords='minimal',
    combine='by_coords'
)

latitude = xr.open_mfdataset(
    str(lat_dpath / '*.nc'),
    data_vars='minimal', coords='minimal',
    combine='by_coords'
)

lon = longitude["longitude"]

lat = latitude["latitude"]

lon=lon.interp(time1=MVBS_ds["ping_time"])
lat=lat.interp(time1=MVBS_ds["ping_time"])

MVBS_ds["longitude"]=lon
MVBS_ds["latitude"]=lat

import datetime
history = (
        f"{datetime.datetime.utcnow()} +00:00. "
        "Interpolated from Platform latitude/longitude."
    )
MVBS_ds["latitude"] = MVBS_ds["latitude"].assign_attrs({"history": history})
MVBS_ds["longitude"] = MVBS_ds["longitude"].assign_attrs({"history": history})

MVBS_ds
MVBS_ds.to_netcdf(base_dpath / "concatenated_MVBS.nc")