Source code for suncasa.eovsa.eovsa_IDBfiledownloader

from astropy.time import Time
import os
import requests
from bs4 import BeautifulSoup

'''
t=Time(['2023-05-09T18:35:19','2023-05-09T18:46:00'])
files=eovsa_filedownloader(t,outpath='/home/surajit')
print (files)
'''

[docs] def get_times_from_web(link): pieces=link.split('/') if pieces[-1]=='': ymd=pieces[-2] sep=1 else: ymd=pieces[-1] sep=0 year=ymd[:4] page=requests.get(link).text soup = BeautifulSoup(page, 'html.parser') if sep==0: times=[node.get('href')[3:] for node in soup.find_all('a') if "IDB"+year in node.get('href')] else: times=[node.get('href')[3:-1] for node in soup.find_all('a') if "IDB"+year in node.get('href')] return times
[docs] def eovsa_filedownloader(trange, outpath='./'): #Given a timerange, this routine will take all relevant IDBfiles from # that time range, put them in a list, and return that list. trange.format='datetime' year_month_date1=trange[0].strftime("%Y%m%d") year_month_date2=trange[1].strftime("%Y%m%d") if year_month_date1>='20170400' and year_month_date1<='20210400': link='https://research.ssl.berkeley.edu/data/eovsa/' elif year_month_date1>='20210300' and year_month_date1<='20221231': link='http://ovsa.njit.edu/IDB2/' else: link='http://www.ovsa.njit.edu/fits/IDB/' if year_month_date1==year_month_date2: ymd=[year_month_date1] else: diff=trange[1]-trange[0] diff_seconds=abs(diff.value) if diff_seconds>1: raise RuntimeError("The supplied two times differby more than 1 day!!!") else: ymd=[year_month_date1,year_month_date2] for year_month_date in ymd: #str1='curl -s '+link+year_month_date+'/ | grep href=\\\"IDB | awk \'{print $5}\' | cut -d\'\"\' -f2 | cut -d\'I\' -d\'B\' -f2 | cut -d\'/\' -f1' #print (str1) #filelist=os.popen(str1).read() #times1=filelist.split('\n')[:-1] times1=get_times_from_web(link+year_month_date+'/') times=Time([t1[:4]+"-"+t1[4:6]+"-"+t1[6:8]+"T"+t1[8:10]+":"+t1[10:12]+":"+t1[12:] for t1 in times1]) mint=trange[0] maxt=trange[1] down_times=[] for t in times: if t>=mint and t<=maxt: down_times.append(t) down_times=Time(down_times) down_times.format='datetime' files=[] os.chdir(outpath) for t in down_times: filename='IDB'+t.strftime("%Y%m%d%H%M%S") str1="wget -r -np -e robots=\"off\" -R \"index.html\" "+link+\ year_month_date+"/"+filename+"/" os.system(str1) os.system("mv "+link.split('//')[1]+year_month_date+'/'+filename+" ./") os.system("rm -rf "+link.split('//')[1].split('/')[0]) files.append(filename) files.sort() return files