64 lines
1.5 KiB
Python
64 lines
1.5 KiB
Python
from __future__ import annotations
|
|
|
|
import sys
|
|
from typing import TYPE_CHECKING
|
|
from argparse import ArgumentParser
|
|
|
|
from .._models import BaseModel
|
|
from ...lib._validators import (
|
|
get_validators,
|
|
write_out_file,
|
|
read_any_format,
|
|
apply_validators,
|
|
apply_necessary_remediation,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from argparse import _SubParsersAction
|
|
|
|
|
|
def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
|
|
sub = subparser.add_parser("fine_tunes.prepare_data")
|
|
sub.add_argument(
|
|
"-f",
|
|
"--file",
|
|
required=True,
|
|
help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
|
|
"This should be the local file path.",
|
|
)
|
|
sub.add_argument(
|
|
"-q",
|
|
"--quiet",
|
|
required=False,
|
|
action="store_true",
|
|
help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
|
|
)
|
|
sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
|
|
|
|
|
|
class PrepareDataArgs(BaseModel):
|
|
file: str
|
|
|
|
quiet: bool
|
|
|
|
|
|
def prepare_data(args: PrepareDataArgs) -> None:
|
|
sys.stdout.write("Analyzing...\n")
|
|
fname = args.file
|
|
auto_accept = args.quiet
|
|
df, remediation = read_any_format(fname)
|
|
apply_necessary_remediation(None, remediation)
|
|
|
|
validators = get_validators()
|
|
|
|
assert df is not None
|
|
|
|
apply_validators(
|
|
df,
|
|
fname,
|
|
remediation,
|
|
validators,
|
|
auto_accept,
|
|
write_out_file_func=write_out_file,
|
|
)
|