Skip to content

pelinker.ops

load_dataframe(table_path)

Load a dataframe from CSV/TSV file.

Parameters:

Name Type Description Default
table_path Path

Path to the CSV/TSV file (optionally gzipped)

required

Returns:

Type Description
DataFrame

Loaded DataFrame

Source code in pelinker/ops.py
def load_dataframe(table_path: pathlib.Path) -> pd.DataFrame:
    """
    Load a dataframe from CSV/TSV file.

    Args:
        table_path: Path to the CSV/TSV file (optionally gzipped)

    Returns:
        Loaded DataFrame
    """
    table_path = table_path.expanduser()
    if not table_path.exists():
        raise FileNotFoundError(f"Input table not found at {table_path}")

    file_format = _detect_file_format(table_path)
    compression = "gzip" if table_path.suffix.endswith(".gz") else None
    sep = "\t" if file_format == "tsv" else ","

    return pd.read_csv(table_path, sep=sep, compression=compression)

parse_model_filename(filename, prefix)

Parse filename like 'res_bert_1.parquet' to extract model and layer.

Parameters:

Name Type Description Default
filename str

Filename to parse

required
prefix str

parsing prefix

required

Returns:

Name Type Description
tuple tuple[str | None, int | None]

(model, layer) or (None, None) if pattern doesn't match

Source code in pelinker/ops.py
def parse_model_filename(filename: str, prefix: str) -> tuple[str | None, int | None]:
    """
    Parse filename like 'res_bert_1.parquet' to extract model and layer.

    Args:
        filename: Filename to parse
        prefix: parsing prefix

    Returns:
        tuple: (model, layer) or (None, None) if pattern doesn't match
    """
    # Pattern: <prefix]>_<model>_<layer>.parquet
    pattern = rf"{prefix}_([^_]+)_(\d+)\.parquet"
    match = re.match(pattern, filename)
    if match:
        model = match.group(1)
        layer = int(match.group(2))
        return model, layer
    return None, None