FAOM API documentation
foam.support_functions
Helpful functions in general. Reading HDF5, processing strings, manipulating dataFrames...
1"""Helpful functions in general. Reading HDF5, processing strings, manipulating dataFrames...""" 2 3import logging 4import re 5from pathlib import Path 6 7import h5py 8import pandas as pd 9 10logger = logging.getLogger("logger.sf") 11 12 13################################################################################ 14def split_line(line, sep): 15 """ 16 Splits a string in 2 parts. 17 18 Parameters 19 ---------- 20 line: string 21 String to split in 2. 22 sep: string 23 Separator where the string has to be split around. 24 25 Returns 26 ---------- 27 head: string 28 Part 1 of the string before the separator. 29 tail: string 30 Part 2 of the string after the separator. 31 """ 32 head, sep_, tail = line.partition(sep) 33 assert sep_ == sep 34 return head, tail 35 36 37################################################################################ 38def substring(line, sep_first, sep_second): 39 """ 40 Get part of a string between 2 specified separators. 41 If second separator is not found, return everything after first separator. 42 43 Parameters 44 ---------- 45 line: string 46 String to get substring from. 47 sep_first: string 48 First separator after which the returned substring should start. 49 sep_first: string 50 Second separator at which the returned substring should end. 51 52 Returns 53 ---------- 54 head: string 55 Part of the string between the 2 separators. 56 """ 57 head, tail = split_line(line, sep=sep_first) 58 if sep_second not in tail: 59 return tail 60 head, tail = split_line(tail, sep=sep_second) 61 return head 62 63 64################################################################################ 65def get_param_from_filename(file_path, parameters, values_as_float=False): 66 """ 67 Get parameters from filename 68 69 Parameters 70 ---------- 71 file_path : string 72 Full path to the file 73 parameters: list of string 74 Names of parameters to extract from filename 75 76 Returns 77 ---------- 78 param_dict: dict 79 Keys are strings describing the parameter, values are strings giving corresponding parameter values 80 """ 81 82 param_dict = {} 83 for parameter in parameters: 84 try: 85 p = substring(Path(file_path).stem, parameter, "_") 86 if values_as_float: 87 p = float(p) 88 param_dict[parameter] = p 89 except: 90 logger.warning( 91 f"In get_param_from_filename: parameter \"{parameter}\" not found in '{file_path}', value not added" 92 ) 93 94 return param_dict 95 96 97################################################################################ 98def read_hdf5(filename): 99 """ 100 Read a HDF5-format file (e.g. GYRE) 101 102 Parameters 103 ---------- 104 filename : string 105 Input file 106 107 Returns 108 ---------- 109 attributes: dict 110 Dictionary containing the attributes of the file. 111 data: dict 112 Dictionary containing the data from the file as numpy arrays. 113 """ 114 # Open the file 115 with h5py.File(filename, "r") as file: 116 # Read attributes 117 attributes = dict(zip(file.attrs.keys(), file.attrs.values())) 118 # Read datasets 119 data = {} 120 for k in file.keys(): 121 data[k] = file[k][...] 122 return attributes, data 123 124 125################################################################################ 126def sign(x): 127 """ 128 Returns the sign of a number as a string 129 130 Parameters 131 ---------- 132 x: float or int 133 134 Returns 135 ---------- 136 s: string 137 A string representing the sign of the number 138 """ 139 if abs(x) == x: 140 return "+" 141 else: 142 return "-" 143 144 145################################################################################ 146def get_subgrid_dataframe(file_to_read, fixed_params=None): 147 """ 148 Read a hdf5 file containing the grid information as a pandas dataframe. 149 Parameters can be fixed to certain values to filter out entries with other values of that parameter. 150 151 Parameters 152 ---------- 153 file_to_read: string 154 path to the file to read 155 fixed_params: dict 156 keys are parameters to fix to the value specified in the dictionary 157 158 Returns 159 ---------- 160 df: pandas DataFrame 161 """ 162 df = pd.read_hdf(file_to_read) 163 164 if fixed_params is not None: 165 for param in fixed_params.keys(): 166 indices_to_drop = df[df[param] != fixed_params[param]].index 167 df.drop(indices_to_drop, inplace=True) 168 df.reset_index(drop=True, inplace=True) 169 170 return df 171 172 173################################################################################ 174def add_surface_to_puls_grid( 175 grid_frequencies, 176 grid_surface, 177 output_name=" grid_surface+freq.hdf", 178 grid_parameters=["Z", "M", "logD", "aov", "fov", "Xc"], 179): 180 """ 181 Combine the output files with the frequencies and surface info of the grid in one new file, 182 only keeping models that have entries in both the grid files. 183 184 Parameters 185 ---------- 186 grid_frequencies: string 187 Path to the file containing the model input parameters and corresponding frequencies of the model. 188 grid_surface: string 189 Path to the file containing the model input parameters and corresponding surface info of the model. 190 output_name: string 191 Name of the generated file containing the combined info. 192 grid_parameters: list of string 193 List of the model parameters to use for matching the entries in the freq/surface file. 194 """ 195 freq_df = pd.read_hdf(grid_frequencies) 196 surface_df = pd.read_hdf(grid_surface) 197 # Merge with surface info first, freq info second. Only keeping rows that both dataFrames have in common based on the 'on' columns. 198 df_merged = pd.merge(surface_df, freq_df, how="inner", on=grid_parameters) 199 200 _ = df_merged.pop("age") # Don't add the age in the combined file 201 202 # take the column with rotation and place it as the first column, and its error as second column 203 col = df_merged.pop("rot") 204 df_merged.insert(0, col.name, col) 205 col = df_merged.pop("rot_err") 206 df_merged.insert(1, col.name, col) 207 # write the merged dataFrame to a new file 208 df_merged.to_hdf(path_or_buf=output_name, key="pulsation_surface_grid", format="table", mode="w")
def
split_line(line, sep):
15def split_line(line, sep): 16 """ 17 Splits a string in 2 parts. 18 19 Parameters 20 ---------- 21 line: string 22 String to split in 2. 23 sep: string 24 Separator where the string has to be split around. 25 26 Returns 27 ---------- 28 head: string 29 Part 1 of the string before the separator. 30 tail: string 31 Part 2 of the string after the separator. 32 """ 33 head, sep_, tail = line.partition(sep) 34 assert sep_ == sep 35 return head, tail
Splits a string in 2 parts.
Parameters
- line (string): String to split in 2.
- sep (string): Separator where the string has to be split around.
Returns
- head (string): Part 1 of the string before the separator.
- tail (string): Part 2 of the string after the separator.
def
substring(line, sep_first, sep_second):
39def substring(line, sep_first, sep_second): 40 """ 41 Get part of a string between 2 specified separators. 42 If second separator is not found, return everything after first separator. 43 44 Parameters 45 ---------- 46 line: string 47 String to get substring from. 48 sep_first: string 49 First separator after which the returned substring should start. 50 sep_first: string 51 Second separator at which the returned substring should end. 52 53 Returns 54 ---------- 55 head: string 56 Part of the string between the 2 separators. 57 """ 58 head, tail = split_line(line, sep=sep_first) 59 if sep_second not in tail: 60 return tail 61 head, tail = split_line(tail, sep=sep_second) 62 return head
Get part of a string between 2 specified separators. If second separator is not found, return everything after first separator.
Parameters
- line (string): String to get substring from.
- sep_first (string): First separator after which the returned substring should start.
- sep_first (string): Second separator at which the returned substring should end.
Returns
- head (string): Part of the string between the 2 separators.
def
get_param_from_filename(file_path, parameters, values_as_float=False):
66def get_param_from_filename(file_path, parameters, values_as_float=False): 67 """ 68 Get parameters from filename 69 70 Parameters 71 ---------- 72 file_path : string 73 Full path to the file 74 parameters: list of string 75 Names of parameters to extract from filename 76 77 Returns 78 ---------- 79 param_dict: dict 80 Keys are strings describing the parameter, values are strings giving corresponding parameter values 81 """ 82 83 param_dict = {} 84 for parameter in parameters: 85 try: 86 p = substring(Path(file_path).stem, parameter, "_") 87 if values_as_float: 88 p = float(p) 89 param_dict[parameter] = p 90 except: 91 logger.warning( 92 f"In get_param_from_filename: parameter \"{parameter}\" not found in '{file_path}', value not added" 93 ) 94 95 return param_dict
Get parameters from filename
Parameters
- file_path (string): Full path to the file
- parameters (list of string): Names of parameters to extract from filename
Returns
- param_dict (dict): Keys are strings describing the parameter, values are strings giving corresponding parameter values
def
read_hdf5(filename):
99def read_hdf5(filename): 100 """ 101 Read a HDF5-format file (e.g. GYRE) 102 103 Parameters 104 ---------- 105 filename : string 106 Input file 107 108 Returns 109 ---------- 110 attributes: dict 111 Dictionary containing the attributes of the file. 112 data: dict 113 Dictionary containing the data from the file as numpy arrays. 114 """ 115 # Open the file 116 with h5py.File(filename, "r") as file: 117 # Read attributes 118 attributes = dict(zip(file.attrs.keys(), file.attrs.values())) 119 # Read datasets 120 data = {} 121 for k in file.keys(): 122 data[k] = file[k][...] 123 return attributes, data
Read a HDF5-format file (e.g. GYRE)
Parameters
- filename (string): Input file
Returns
- attributes (dict): Dictionary containing the attributes of the file.
- data (dict): Dictionary containing the data from the file as numpy arrays.
def
sign(x):
127def sign(x): 128 """ 129 Returns the sign of a number as a string 130 131 Parameters 132 ---------- 133 x: float or int 134 135 Returns 136 ---------- 137 s: string 138 A string representing the sign of the number 139 """ 140 if abs(x) == x: 141 return "+" 142 else: 143 return "-"
Returns the sign of a number as a string
Parameters
- x (float or int):
Returns
- s (string): A string representing the sign of the number
def
get_subgrid_dataframe(file_to_read, fixed_params=None):
147def get_subgrid_dataframe(file_to_read, fixed_params=None): 148 """ 149 Read a hdf5 file containing the grid information as a pandas dataframe. 150 Parameters can be fixed to certain values to filter out entries with other values of that parameter. 151 152 Parameters 153 ---------- 154 file_to_read: string 155 path to the file to read 156 fixed_params: dict 157 keys are parameters to fix to the value specified in the dictionary 158 159 Returns 160 ---------- 161 df: pandas DataFrame 162 """ 163 df = pd.read_hdf(file_to_read) 164 165 if fixed_params is not None: 166 for param in fixed_params.keys(): 167 indices_to_drop = df[df[param] != fixed_params[param]].index 168 df.drop(indices_to_drop, inplace=True) 169 df.reset_index(drop=True, inplace=True) 170 171 return df
Read a hdf5 file containing the grid information as a pandas dataframe. Parameters can be fixed to certain values to filter out entries with other values of that parameter.
Parameters
- file_to_read (string): path to the file to read
- fixed_params (dict): keys are parameters to fix to the value specified in the dictionary
Returns
- df (pandas DataFrame):
def
add_surface_to_puls_grid( grid_frequencies, grid_surface, output_name=' grid_surface+freq.hdf', grid_parameters=['Z', 'M', 'logD', 'aov', 'fov', 'Xc']):
175def add_surface_to_puls_grid( 176 grid_frequencies, 177 grid_surface, 178 output_name=" grid_surface+freq.hdf", 179 grid_parameters=["Z", "M", "logD", "aov", "fov", "Xc"], 180): 181 """ 182 Combine the output files with the frequencies and surface info of the grid in one new file, 183 only keeping models that have entries in both the grid files. 184 185 Parameters 186 ---------- 187 grid_frequencies: string 188 Path to the file containing the model input parameters and corresponding frequencies of the model. 189 grid_surface: string 190 Path to the file containing the model input parameters and corresponding surface info of the model. 191 output_name: string 192 Name of the generated file containing the combined info. 193 grid_parameters: list of string 194 List of the model parameters to use for matching the entries in the freq/surface file. 195 """ 196 freq_df = pd.read_hdf(grid_frequencies) 197 surface_df = pd.read_hdf(grid_surface) 198 # Merge with surface info first, freq info second. Only keeping rows that both dataFrames have in common based on the 'on' columns. 199 df_merged = pd.merge(surface_df, freq_df, how="inner", on=grid_parameters) 200 201 _ = df_merged.pop("age") # Don't add the age in the combined file 202 203 # take the column with rotation and place it as the first column, and its error as second column 204 col = df_merged.pop("rot") 205 df_merged.insert(0, col.name, col) 206 col = df_merged.pop("rot_err") 207 df_merged.insert(1, col.name, col) 208 # write the merged dataFrame to a new file 209 df_merged.to_hdf(path_or_buf=output_name, key="pulsation_surface_grid", format="table", mode="w")
Combine the output files with the frequencies and surface info of the grid in one new file, only keeping models that have entries in both the grid files.
Parameters
- grid_frequencies (string): Path to the file containing the model input parameters and corresponding frequencies of the model.
- grid_surface (string): Path to the file containing the model input parameters and corresponding surface info of the model.
- output_name (string): Name of the generated file containing the combined info.
- grid_parameters (list of string): List of the model parameters to use for matching the entries in the freq/surface file.