submarine.save_model(model, "tensorflow")
in your script
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras import layers, models
import submarine
def make_datasets_unbatched():
BUFFER_SIZE = 10000
# Scaling MNIST data from (0, 255] to (0., 1.]
def scale(image, label):
image = tf.cast(image, tf.float32)
image /= 255
return image, label
datasets, _ = tfds.load(name='mnist', with_info=True, as_supervised=True)
return datasets['train'].map(scale).cache().shuffle(BUFFER_SIZE)
def build_and_compile_cnn_model():
model = models.Sequential()
model.add(
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
def main():
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
communication=tf.distribute.experimental.CollectiveCommunication.AUTO)
BATCH_SIZE_PER_REPLICA = 4
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
with strategy.scope():
ds_train = make_datasets_unbatched().batch(BATCH_SIZE).repeat()
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = \
tf.data.experimental.AutoShardPolicy.DATA
ds_train = ds_train.with_options(options)
# Model building/compiling need to be within `strategy.scope()`.
multi_worker_model = build_and_compile_cnn_model()
class MyCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
# monitor the loss and accuracy
submarine.log_metric({"loss": logs["loss"], "accuracy": logs["accuracy"]}, epoch)
model = multi_worker_model.fit(ds_train, epochs=2, steps_per_epoch=70, callbacks=[MyCallback()])
submarine.save_model(model, "tensorflow")
if __name__ == '__main__':
main()
Learn More →
Learn More →
Learn More →
Learn More →
SkyPilot is a framework for running LLMs, AI, and batch jobs on any cloud, offering maximum cost savings, highest GPU availability, and managed execution.
Apr 17, 2024Motivation Currently it is hard to implement backend plugins, especially for data-scientists & MLE’s who do not have working knowledge of Golang. Also, performance requirements, maintenance and development is cumbersome. The document here proposes a path to make it possible to write plugins rapidly, while decoupling them from the core flytepropeller engine. Goals Plugins should be easy to author - no need of code generation, using tools that MLEs and Data Scientists are not accustomed to using. Most important plugins for Flyte today are plugins that communicate with external services. It should be possible to test these plugins independently and also deploy them privately. It should be possible for users to use backend plugins for local development, especially in flytekit and unionML
Dec 28, 2023PR: https://github.com/flyteorg/flytekit/pull/1782
Sep 8, 2023Issues Discussion Motivation: Why do you think this is important? Currently flyteadmin notifications are delivered using the PagerDuty, Github and Slack email APIs. On AWS deployments FlyteAdmin uses SES to trigger emails, for all others the only alternative email implementation is SendGrid integration. Setting up SES or SendGrid can be somewhat complicated. Furthermore, asking your Flyte users to configure the aforementioned services with email integrations adds even more overhead. It would be simpler as an alternative to provide webhook integration for notification so that users only have to configure existing API keys for PagerDuty/Github/Slack. Flyte currently only allows sending notifications by email and requires users to explicitly define notification rules in their launchplans. FlyteAdmin Webhook
Jun 5, 2023or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up