Source code for suncasa.eovsa.eovsa_flarelist

##ipython eovsa_flarelist.py
##=============
import os
import sys
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from scipy.signal import find_peaks
from datetime import datetime, timedelta
from astropy.time import Time
from astropy.io import fits
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Global settings and initializations
# EO_WIKI_URL = "http://www.ovsa.njit.edu/wiki/index.php/Recent_Flare_List_(2021-)"
[docs] EO_WIKI_URLs = [ "https://www.ovsa.njit.edu/wiki/index.php/2017", "https://www.ovsa.njit.edu/wiki/index.php/2018", "https://www.ovsa.njit.edu/wiki/index.php/2019", "https://www.ovsa.njit.edu/wiki/index.php/2020", "https://www.ovsa.njit.edu/wiki/index.php/2021", "https://www.ovsa.njit.edu/wiki/index.php/2022", "https://www.ovsa.njit.edu/wiki/index.php/2023", "https://www.ovsa.njit.edu/wiki/index.php/2024", "https://www.ovsa.njit.edu/wiki/index.php/2025", "https://www.ovsa.njit.edu/wiki/index.php/2026" ]
[docs] def fetch_flare_data_from_wiki(eo_wiki_urls, given_date_strp, outcsvfile): """ Fetches flare data from the given wiki URL and saves it to a CSV file in the specified directory. Parameters: - eo_wiki_url: URL of the EO wiki page to fetch data from. - given_date_strp: A tuple of datetime objects specifying the start and end dates to filter the data. - outcsvfile: path to the resulting CSV file. Returns: - None """ date_data = [] time_ut_data = [] flare_class_data = [] flare_ID_data = [] depec_file = [] for eo_wiki_url in eo_wiki_urls: response = requests.get(eo_wiki_url) # Check if the request was successful if response.status_code == 200: # Parse the HTML content of the page using BeautifulSoup soup = BeautifulSoup(response.text, "html.parser") tables = soup.find_all("table", {"class": "wikitable"}) for table in tables: for row in table.find_all("tr"): cells = row.find_all("td") if len(cells) >= 3: date = cells[0].text.strip() time_ut = cells[1].text.strip() flare_class = cells[2].text.strip() datetime_strp = datetime.strptime(date + ' ' + time_ut, '%Y-%m-%d %H:%M') if given_date_strp[0] <= datetime_strp <= given_date_strp[1]: # get Date, Time (UT) and Flare Class date_data.append(date) time_ut_data.append(f"{time_ut}:00") flare_class_data.append(flare_class) # get Flare_ID eotime = f"{date.replace('/', '-')}T{time_ut}:00" flare_ID_data.append(eotime.replace('-', '').replace('T', '').replace(':', '')) print("Fetched: ", eotime) depec_file_tmp = '' for cell in cells: link_cell = cell.find('a', class_='external text', href=True, rel='nofollow') if link_cell: url = link_cell['href'] if url.endswith(".dat") or url.endswith(".fits"): depec_file_tmp = url.split('/')[-1] break # get depec_file depec_file.append(str(depec_file_tmp)) else: print("Failed to retrieve the webpage. Status code:", response.status_code) # reformate the date and time above data = { "ID": np.arange(len(date_data)) + 1, "Flare_ID": flare_ID_data, "Date": date_data, "Time (UT)": time_ut_data, "Flare Class": flare_class_data, "depec_file": depec_file } df = pd.DataFrame(data) df.to_csv(outcsvfile, index=False) print(f"Date and Time (UT) data saved to {outcsvfile}")
[docs] def rd_datfile(file): ''' Read EOVSA binary spectrogram file and return a dictionary with times in Julian Date, frequencies in GHz, and cross-power data in sfu. Return Keys: 'time' Numpy array of nt times in JD format 'fghz' Numpy array of nf frequencies in GHz 'data' Numpy array of size [nf, nt] containing cross-power data Returns empty dictionary ({}) if file size is not compatible with inferred dimensions ''' import struct import numpy as np def dims(file): # Determine time and frequency dimensions (assumes the file has fewer than 10000 times) f = open(file, 'rb') tmp = f.read(83608) # max 10000 times and 451 frequencies f.close() nbytes = len(tmp) tdat = np.array(struct.unpack(str(int(nbytes / 8)) + 'd', tmp[:nbytes])) nt = np.where(tdat < 2400000.)[0] nf = np.where(np.logical_or(tdat[nt[0]:] > 18, tdat[nt[0]:] < 1))[0] return nt[0], nf[0] nt, nf = dims(file) f = open(file, 'rb') tmp = f.read(nt * 8) times = struct.unpack(str(nt) + 'd', tmp) tmp = f.read(nf * 8) fghz = struct.unpack(str(nf) + 'd', tmp) tmp = f.read() f.close() if len(tmp) != nf * nt * 4: print('File size is incorrect for nt=', nt, 'and nf=', nf) return {} data = np.array(struct.unpack(str(nt * nf) + 'f', tmp)).reshape(nf, nt) return {'time': times, 'fghz': fghz, 'data': data}
[docs] def moving_average(data, window_size): # Create a convolution kernel for the moving average kernel = np.ones(window_size) / window_size return np.convolve(data, kernel, mode='valid')
[docs] def get_eoflarelist(timerange=None, work_dir='./', file_out='EOVSA_flare_list_from_wiki_sub.csv'): ''' Get eovsa flarelist from given timerange. On pipeline or ovsa server. Parameters: timerange: Example: final_csv_file = get_eoflarelist(timerange=["2024-01-01 20:00:00", "2024-01-01 21:00:00"]) ''' # Convert timerange strings to datetime objects for internal use timerange_strp = [datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S') for date_str in timerange] print(f"Fetching data for time range {timerange[0]} to {timerange[1]}") ##=============Step 1: Fetching data from wiki============= print("##=============Step 1: capture the radio peak times from flare list wiki webpage") init_csv = os.path.join(work_dir, "get_time_from_wiki_given_date.csv") fetch_flare_data_from_wiki(EO_WIKI_URLs, timerange_strp, init_csv) ##=============Step 2: read the spectrum data============= print("##=============Step 2: read the spectrum data") spec_data_dir = "/common/webplots/events/" # YYYY/ print("Spec data in ", spec_data_dir) df = pd.read_csv(init_csv) flare_id = df['Flare_ID'] depec_file = df['depec_file'] files_wiki = [spec_data_dir + str(flare_id[i])[0:4] + "/" + str(file_name) for i, file_name in enumerate(depec_file)] ##============= tpk_spec_wiki = [] tst_mad_spec_wiki, ted_mad_spec_wiki = [], [] tst_thrd_spec_wiki, ted_thrd_spec_wiki = [], [] depec_file = [] ##============= for ww, file_wiki in enumerate(files_wiki): # len(files_wiki) filename1 = os.path.basename(file_wiki) depec_file.append(filename1) print("Reading spec data: ", filename1) try: if filename1.split('.')[-1] == 'dat': filename = filename1.split('.dat')[0] data1 = rd_datfile(file_wiki) spec = np.array(data1['data']) fghz = np.array(data1['fghz']) time1 = data1['time'] if filename1.split('.')[-1] == 'fits': filename = filename1.split('.fits')[0] eospecfits = fits.open(file_wiki) spec = eospecfits[0].data # [freq, time] fghz = np.array(eospecfits[1].data['FGHZ']) # in GHz time1 = np.array(eospecfits[2].data['TIME']) # in jd format except Exception as e: temp = datetime.strptime(str(flare_id[ww]), "%Y%m%d%H%M%S") temp_st = (temp - timedelta(minutes=2)).strftime("%Y-%m-%d %H:%M:%S") temp_ed = (temp + timedelta(minutes=2)).strftime("%Y-%m-%d %H:%M:%S") tpk_spec_wiki.append(temp.strftime("%Y-%m-%d %H:%M:%S")) tst_mad_spec_wiki.append(temp_st) ted_mad_spec_wiki.append(temp_ed) tst_thrd_spec_wiki.append(temp_st) ted_thrd_spec_wiki.append(temp_ed) print('no data found for:', file_wiki) print('st/ed time will be tpk \u00B1 2mins: ', temp_st, temp_ed) continue time_spec = Time(time1, format='jd') time_str = time_spec.isot ##=============try MAD method tpk_tot_ind = [] tst_tot_ind = [] ted_tot_ind = [] mad_threshd = [] tst_tot_ind_thrd = [] ted_tot_ind_thrd = [] spec = np.nan_to_num(spec, nan=0.0) spec[spec < 0] = 0.01 spec_median = np.median(spec, axis=1, keepdims=True) spec_abs_deviations = np.abs(spec - spec_median) spec_mad = np.median(spec_abs_deviations, axis=1, keepdims=True) mad_threshd = 3.0 * spec_mad outliers = spec_abs_deviations > mad_threshd for ff in range(len(fghz)): good_channel = False flux_array = spec[ff, :] ##=============try MAD method outlier_ind = np.where(outliers[ff])[0] if len(outlier_ind) > 0: tst_tot_ind.append(outlier_ind[0]) ted_tot_ind.append(outlier_ind[-1]) ##=============try to set threshold window_size = 10 y = moving_average(flux_array, window_size) + 0.001 ##flux_array peaks, _ = find_peaks(y, height=1.) noise_thrd_st = np.mean(y[0:5]) noise_thrd_ed = np.mean(y[-5:]) if noise_thrd_st == 0: noise_thrd_st = 0.005 * np.max(y) if noise_thrd_ed == 0: noise_thrd_ed = 0.01 * np.max(y) if np.max(y) / noise_thrd_ed > 100: noise_thrd_ed = 0.02 * np.max(y) # noise_thrd_st = np.max([np.mean(y[0:10]),0.01*np.max(y)]) # noise_thrd_ed = np.max([np.mean(y[-10:])*np.median(y[peaks]),0.01*np.max(y)]) # if ff == 5: # print(np.max(y), np.median(y[peaks]), np.mean(y[0:10]), np.mean(y[-10:])) for ind in range(len(y) - 5): if y[ind] < y[ind + 1] < y[ind + 2] < y[ind + 3] < y[ind + 4] < y[ind + 5]: if y[ind + 5] >= 2 * y[ind]: if all(y[i] > noise_thrd_st for i in range(ind, ind + 6)): tst_tot_ind_thrd.append(ind) break ind_tmp = np.argmax(flux_array) - 30 for ind in range(len(y) - 5): if y[ind] > y[ind + 1] > y[ind + 2] > y[ind + 3] > y[ind + 4] > y[ind + 5]: if y[ind + 3] <= 2 * y[ind]: if all(abs(y[i]) > noise_thrd_ed for i in range(ind, ind + 6)): ind_tmp = ind + 5 ted_tot_ind_thrd.append(ind_tmp) ##=============tpeak tpk_tot_ind.append(np.argmax(flux_array)) time_st_mad = time_spec[int(np.round(np.median(np.array(tst_tot_ind))))] time_ed_mad = time_spec[int(np.round(np.median(np.array(ted_tot_ind))))] time_st_thrd = time_spec[int(np.round(np.median(np.array(tst_tot_ind_thrd))))] time_ed_thrd = time_spec[int(np.round(np.median(np.array(ted_tot_ind_thrd))))] time_st = time_st_thrd time_ed = time_ed_thrd time_pk = time_spec[int(np.median(np.array(tpk_tot_ind)))] time_pk_obj = Time(time_pk, format='jd') tpk_spec_wiki.append(time_pk_obj.strftime('%Y-%m-%d %H:%M:%S')) tst_mad_spec_wiki.append(Time(time_st_mad, format='jd').strftime('%Y-%m-%d %H:%M:%S')) ted_mad_spec_wiki.append(Time(time_ed_mad, format='jd').strftime('%Y-%m-%d %H:%M:%S')) tst_thrd_spec_wiki.append(Time(time_st_thrd, format='jd').strftime('%Y-%m-%d %H:%M:%S')) ted_thrd_spec_wiki.append(Time(time_ed_thrd, format='jd').strftime('%Y-%m-%d %H:%M:%S')) #apply the thrd method EO_tpeak = tpk_spec_wiki EO_tstart_thrd = tst_thrd_spec_wiki EO_tend_thrd = ted_thrd_spec_wiki data_csv = { "ID": np.arange(len(flare_id)) + 1, "Flare_ID": flare_id, "Date": df['Date'], "Time (UT)": df['Time (UT)'], "Flare Class": df['Flare Class'], 'EO_tstart': EO_tstart_thrd, 'EO_tpeak': EO_tpeak, 'EO_tend': EO_tend_thrd, 'depec_file': df['depec_file'] } df = pd.DataFrame(data_csv) final_csv_file = os.path.join(work_dir, file_out) df.to_csv(final_csv_file, index=False) print(f"Success: {len(flare_id)} flares saved to {file_out}") return final_csv_file
[docs] def run_eoflare_pipeline(flarelist_csv=None, to_web=True): ##=========================reading flarelist from .csv========================= import pandas as pd import os import shutil df = pd.read_csv(flarelist_csv) flare_id_tot = df['Flare_ID'] EO_tstart_tot = df['EO_tstart'] EO_tend_tot = df['EO_tend'] for i,j in enumerate(flare_id_tot): print(f"Flare_ID, st, ed time: {flare_id_tot[i]}, {EO_tstart_tot[i]}, {EO_tend_tot[i]}") ##=========================running flare pipeline========================= ans = 'Y' # ans = input('Do you want to continue to run flare pipeline for Flare_IDs above? (say no if you want to stop) [y/n]?') root_dir = os.getcwd() if ans.upper() == 'Y': from suncasa.eovsa import eovsa_flare_pipeline from eovsapy.util import Time for ff, flare_id in enumerate(flare_id_tot):#flare_id_tot, len(flare_id_tot) os.chdir(root_dir) try: EO_tstart = EO_tstart_tot[ff] EO_tend = EO_tend_tot[ff] flare_id = str(flare_id) print(f"To run flare pipeline for Flare_ID: {flare_id}") work_dir = os.path.join(root_dir, flare_id) os.makedirs(work_dir, exist_ok=True) os.chdir(work_dir) trange_str = [EO_tstart, EO_tend] trange = Time(trange_str) fp = eovsa_flare_pipeline.FlareSelfCalib(vis=trange) fp.imaging_start = trange_str[0] fp.imaging_end = trange_str[1] fp.slfcal_pipeline(doselfcal=True, doimaging=True) if to_web: fp.rename_move_files(flare_id=flare_id, fitsdir_web_tp='/data1/eovsa/fits/flares/', movdir_web_tp='/common/webplots/SynopticImg/eovsamedia/eovsa-browser/', msdir_web_tp='/data1/eovsa/fits/flares/', dorename_fits=True, domove_fits=True, dorename_mov=True, domove_mov=True, domove_ms=True, dormworkdir=True, docopy=True) print(f"Success for Flare_ID: {flare_id}") except Exception as e: print(f"Error at ff = {ff} for Flare_id = {flare_id}: {e}") continue os.chdir(root_dir) print('Done!')
# if __name__ == "__main__": # main()