Home Installation Walkthrough Pipeline modules Pipeline configuration Plotting tools Community guidelines

FAOM API documentation


Helpful functions in general. Reading HDF5, processing strings, manipulating dataFrames...

  1"""Helpful functions in general. Reading HDF5, processing strings, manipulating dataFrames..."""
  3import logging
  4import re
  5from pathlib import Path
  7import h5py
  8import pandas as pd
 10logger = logging.getLogger("logger.sf")
 14def split_line(line, sep):
 15    """
 16    Splits a string in 2 parts.
 18    Parameters
 19    ----------
 20    line: string
 21        String to split in 2.
 22    sep: string
 23        Separator where the string has to be split around.
 25    Returns
 26    ----------
 27    head: string
 28        Part 1 of the string before the separator.
 29    tail: string
 30        Part 2 of the string after the separator.
 31    """
 32    head, sep_, tail = line.partition(sep)
 33    assert sep_ == sep
 34    return head, tail
 38def substring(line, sep_first, sep_second):
 39    """
 40    Get part of a string between 2 specified separators.
 41    If second separator is not found, return everything after first separator.
 43    Parameters
 44    ----------
 45    line: string
 46        String to get substring from.
 47    sep_first: string
 48        First separator after which the returned substring should start.
 49    sep_first: string
 50        Second separator at which the returned substring should end.
 52    Returns
 53    ----------
 54    head: string
 55        Part of the string between the 2 separators.
 56    """
 57    head, tail = split_line(line, sep=sep_first)
 58    if sep_second not in tail:
 59        return tail
 60    head, tail = split_line(tail, sep=sep_second)
 61    return head
 65def get_param_from_filename(file_path, parameters, values_as_float=False):
 66    """
 67    Get parameters from filename
 69    Parameters
 70    ----------
 71    file_path : string
 72        Full path to the file
 73    parameters: list of string
 74        Names of parameters to extract from filename
 76    Returns
 77    ----------
 78    param_dict: dict
 79        Keys are strings describing the parameter, values are strings giving corresponding parameter values
 80    """
 82    param_dict = {}
 83    for parameter in parameters:
 84        try:
 85            p = substring(Path(file_path).stem, parameter, "_")
 86            if values_as_float:
 87                p = float(p)
 88            param_dict[parameter] = p
 89        except:
 90            logger.warning(
 91                f"In get_param_from_filename: parameter \"{parameter}\" not found in '{file_path}', value not added"
 92            )
 94    return param_dict
 98def read_hdf5(filename):
 99    """
100    Read a HDF5-format file (e.g. GYRE)
102    Parameters
103    ----------
104    filename : string
105        Input file
107    Returns
108    ----------
109    attributes: dict
110        Dictionary containing the attributes of the file.
111    data: dict
112        Dictionary containing the data from the file as numpy arrays.
113    """
114    # Open the file
115    with h5py.File(filename, "r") as file:
116        # Read attributes
117        attributes = dict(zip(file.attrs.keys(), file.attrs.values()))
118        # Read datasets
119        data = {}
120        for k in file.keys():
121            data[k] = file[k][...]
122    return attributes, data
126def sign(x):
127    """
128    Returns the sign of a number as a string
130    Parameters
131    ----------
132    x: float or int
134    Returns
135    ----------
136    s: string
137        A string representing the sign of the number
138    """
139    if abs(x) == x:
140        return "+"
141    else:
142        return "-"
146def get_subgrid_dataframe(file_to_read, fixed_params=None):
147    """
148    Read a hdf5 file containing the grid information as a pandas dataframe.
149    Parameters can be fixed to certain values to filter out entries with other values of that parameter.
151    Parameters
152    ----------
153    file_to_read: string
154        path to the file to read
155    fixed_params: dict
156        keys are parameters to fix to the value specified in the dictionary
158    Returns
159    ----------
160    df: pandas DataFrame
161    """
162    df = pd.read_hdf(file_to_read)
164    if fixed_params is not None:
165        for param in fixed_params.keys():
166            indices_to_drop = df[df[param] != fixed_params[param]].index
167            df.drop(indices_to_drop, inplace=True)
168        df.reset_index(drop=True, inplace=True)
170    return df
174def add_surface_to_puls_grid(
175    grid_frequencies,
176    grid_surface,
177    output_name=" grid_surface+freq.hdf",
178    grid_parameters=["Z", "M", "logD", "aov", "fov", "Xc"],
180    """
181    Combine the output files with the frequencies and surface info of the grid in one new file,
182    only keeping models that have entries in both the grid files.
184    Parameters
185    ----------
186    grid_frequencies: string
187        Path to the file containing the model input parameters and corresponding frequencies of the model.
188    grid_surface: string
189        Path to the file containing the model input parameters and corresponding surface info of the model.
190    output_name: string
191        Name of the generated file containing the combined info.
192    grid_parameters: list of string
193        List of the model parameters to use for matching the entries in the freq/surface file.
194    """
195    freq_df = pd.read_hdf(grid_frequencies)
196    surface_df = pd.read_hdf(grid_surface)
197    # Merge with surface info first, freq info second. Only keeping rows that both dataFrames have in common based on the 'on' columns.
198    df_merged = pd.merge(surface_df, freq_df, how="inner", on=grid_parameters)
200    _ = df_merged.pop("age")  # Don't add the age in the combined file
202    # take the column with rotation and place it as the first column, and its error as second column
203    col = df_merged.pop("rot")
204    df_merged.insert(0, col.name, col)
205    col = df_merged.pop("rot_err")
206    df_merged.insert(1, col.name, col)
207    # write the merged dataFrame to a new file
208    df_merged.to_hdf(path_or_buf=output_name, key="pulsation_surface_grid", format="table", mode="w")
def split_line(line, sep):
15def split_line(line, sep):
16    """
17    Splits a string in 2 parts.
19    Parameters
20    ----------
21    line: string
22        String to split in 2.
23    sep: string
24        Separator where the string has to be split around.
26    Returns
27    ----------
28    head: string
29        Part 1 of the string before the separator.
30    tail: string
31        Part 2 of the string after the separator.
32    """
33    head, sep_, tail = line.partition(sep)
34    assert sep_ == sep
35    return head, tail

Splits a string in 2 parts.

  • line (string): String to split in 2.
  • sep (string): Separator where the string has to be split around.
  • head (string): Part 1 of the string before the separator.
  • tail (string): Part 2 of the string after the separator.
def substring(line, sep_first, sep_second):
39def substring(line, sep_first, sep_second):
40    """
41    Get part of a string between 2 specified separators.
42    If second separator is not found, return everything after first separator.
44    Parameters
45    ----------
46    line: string
47        String to get substring from.
48    sep_first: string
49        First separator after which the returned substring should start.
50    sep_first: string
51        Second separator at which the returned substring should end.
53    Returns
54    ----------
55    head: string
56        Part of the string between the 2 separators.
57    """
58    head, tail = split_line(line, sep=sep_first)
59    if sep_second not in tail:
60        return tail
61    head, tail = split_line(tail, sep=sep_second)
62    return head

Get part of a string between 2 specified separators. If second separator is not found, return everything after first separator.

  • line (string): String to get substring from.
  • sep_first (string): First separator after which the returned substring should start.
  • sep_first (string): Second separator at which the returned substring should end.
  • head (string): Part of the string between the 2 separators.
def get_param_from_filename(file_path, parameters, values_as_float=False):
66def get_param_from_filename(file_path, parameters, values_as_float=False):
67    """
68    Get parameters from filename
70    Parameters
71    ----------
72    file_path : string
73        Full path to the file
74    parameters: list of string
75        Names of parameters to extract from filename
77    Returns
78    ----------
79    param_dict: dict
80        Keys are strings describing the parameter, values are strings giving corresponding parameter values
81    """
83    param_dict = {}
84    for parameter in parameters:
85        try:
86            p = substring(Path(file_path).stem, parameter, "_")
87            if values_as_float:
88                p = float(p)
89            param_dict[parameter] = p
90        except:
91            logger.warning(
92                f"In get_param_from_filename: parameter \"{parameter}\" not found in '{file_path}', value not added"
93            )
95    return param_dict

Get parameters from filename

  • file_path (string): Full path to the file
  • parameters (list of string): Names of parameters to extract from filename
  • param_dict (dict): Keys are strings describing the parameter, values are strings giving corresponding parameter values
def read_hdf5(filename):
 99def read_hdf5(filename):
100    """
101    Read a HDF5-format file (e.g. GYRE)
103    Parameters
104    ----------
105    filename : string
106        Input file
108    Returns
109    ----------
110    attributes: dict
111        Dictionary containing the attributes of the file.
112    data: dict
113        Dictionary containing the data from the file as numpy arrays.
114    """
115    # Open the file
116    with h5py.File(filename, "r") as file:
117        # Read attributes
118        attributes = dict(zip(file.attrs.keys(), file.attrs.values()))
119        # Read datasets
120        data = {}
121        for k in file.keys():
122            data[k] = file[k][...]
123    return attributes, data

Read a HDF5-format file (e.g. GYRE)

  • filename (string): Input file
  • attributes (dict): Dictionary containing the attributes of the file.
  • data (dict): Dictionary containing the data from the file as numpy arrays.
def sign(x):
127def sign(x):
128    """
129    Returns the sign of a number as a string
131    Parameters
132    ----------
133    x: float or int
135    Returns
136    ----------
137    s: string
138        A string representing the sign of the number
139    """
140    if abs(x) == x:
141        return "+"
142    else:
143        return "-"

Returns the sign of a number as a string

  • x (float or int):
  • s (string): A string representing the sign of the number
def get_subgrid_dataframe(file_to_read, fixed_params=None):
147def get_subgrid_dataframe(file_to_read, fixed_params=None):
148    """
149    Read a hdf5 file containing the grid information as a pandas dataframe.
150    Parameters can be fixed to certain values to filter out entries with other values of that parameter.
152    Parameters
153    ----------
154    file_to_read: string
155        path to the file to read
156    fixed_params: dict
157        keys are parameters to fix to the value specified in the dictionary
159    Returns
160    ----------
161    df: pandas DataFrame
162    """
163    df = pd.read_hdf(file_to_read)
165    if fixed_params is not None:
166        for param in fixed_params.keys():
167            indices_to_drop = df[df[param] != fixed_params[param]].index
168            df.drop(indices_to_drop, inplace=True)
169        df.reset_index(drop=True, inplace=True)
171    return df

Read a hdf5 file containing the grid information as a pandas dataframe. Parameters can be fixed to certain values to filter out entries with other values of that parameter.

  • file_to_read (string): path to the file to read
  • fixed_params (dict): keys are parameters to fix to the value specified in the dictionary
  • df (pandas DataFrame):
def add_surface_to_puls_grid( grid_frequencies, grid_surface, output_name=' grid_surface+freq.hdf', grid_parameters=['Z', 'M', 'logD', 'aov', 'fov', 'Xc']):
175def add_surface_to_puls_grid(
176    grid_frequencies,
177    grid_surface,
178    output_name=" grid_surface+freq.hdf",
179    grid_parameters=["Z", "M", "logD", "aov", "fov", "Xc"],
181    """
182    Combine the output files with the frequencies and surface info of the grid in one new file,
183    only keeping models that have entries in both the grid files.
185    Parameters
186    ----------
187    grid_frequencies: string
188        Path to the file containing the model input parameters and corresponding frequencies of the model.
189    grid_surface: string
190        Path to the file containing the model input parameters and corresponding surface info of the model.
191    output_name: string
192        Name of the generated file containing the combined info.
193    grid_parameters: list of string
194        List of the model parameters to use for matching the entries in the freq/surface file.
195    """
196    freq_df = pd.read_hdf(grid_frequencies)
197    surface_df = pd.read_hdf(grid_surface)
198    # Merge with surface info first, freq info second. Only keeping rows that both dataFrames have in common based on the 'on' columns.
199    df_merged = pd.merge(surface_df, freq_df, how="inner", on=grid_parameters)
201    _ = df_merged.pop("age")  # Don't add the age in the combined file
203    # take the column with rotation and place it as the first column, and its error as second column
204    col = df_merged.pop("rot")
205    df_merged.insert(0, col.name, col)
206    col = df_merged.pop("rot_err")
207    df_merged.insert(1, col.name, col)
208    # write the merged dataFrame to a new file
209    df_merged.to_hdf(path_or_buf=output_name, key="pulsation_surface_grid", format="table", mode="w")

Combine the output files with the frequencies and surface info of the grid in one new file, only keeping models that have entries in both the grid files.

  • grid_frequencies (string): Path to the file containing the model input parameters and corresponding frequencies of the model.
  • grid_surface (string): Path to the file containing the model input parameters and corresponding surface info of the model.
  • output_name (string): Name of the generated file containing the combined info.
  • grid_parameters (list of string): List of the model parameters to use for matching the entries in the freq/surface file.