3. Create a folder name data and download the sample file from hereinto it. Divide this file into two parts: twitter_train_dataset.csv and twitter_test_dataset. In the below program we have taken a 80-20 train to test ratio.
Example
import markov
# GET PROJECT
# we have already created a project for this model with project_id: 4FzUBTJv8f9uPb
# you can create a new project from the MarkovML SDK or the web UI
project = markov.Project.from_id("4FzUBTJv8f9uPb")
# GET DATASET
# We have uploaded the following train and test segments to markov as well with dataset id: 3vRT5Ut6mhPqFGc23
train_data_location = "./data/twitter_train_dataset.csv" # location of your dataset
test_data_location = "./data/twitter_test_dataset.csv" # location of your dataset
# read data into dataframe
train_df = pd.read_csv(train_data_location, encoding='latin')
test_df = pd.read_csv(test_data_location, encoding='latin')
# concatenate the data to vectorize both together
data = pd.concat([train_df, test_df])
# train a count vectorizer
tf_vec = CountVectorizer()
tf_vec.fit(data)
x_train, y_train = train_df['text'].values.tolist(), train_df['target'].values
x_test, y_test = test_df['text'].values.tolist(), test_df['target'].values
# transform to vectors
x_train_trans = tf_vec.transform(x_train)
x_test_trans = tf_vec.transform(x_test)
# BUILD MODEL
# Train your MODEL
suffix = int(time.time())
MODEL_NAME = f"Keras Model for Twitter Sentiment Analysis {suffix}"
# build a Keras Network
def _build_model(input_dim):
model = Sequential()
model.add(layers.Dense(64, input_dim=input_dim, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(layers.Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy']
)
return model
# build Keras Model
model = _build_model(x_train_trans.shape[1])
# TRACK THE TRAINING EXPERIMENT USING markovml
# add auto_record from markovml to capture this experiment
markov.keras.auto_record(
name=MODEL_NAME,
notes=f"Auto Recording Keras Model with Name: {MODEL_NAME} with Sentence Encoder",
project_id=project.project_id,
model_class=markov.ModelClass.TAGGING
)
# Train the model (it will take some time to converge!)
model.fit(x_train_trans, y_train, epochs=50, batch_size=32, verbose=False)
# EVALUATE THE MODEL
# print test accuracy report
y_pred = model.predict(x_test_trans)
orig_copy = y_pred.tolist()
y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0
acc = accuracy_score(y_test, y_pred)
print("Test accuracy:", acc)
# Register with MarkovML Backend
# Record the results with MarkovML Evaluator
evaluation_recorder = markov.EvaluationRecorder(
name=f"Sentiment Analysis Keras Model Evaluation {suffix}",
model_id=model.markov_model_id,
notes="This model evaluation captures the performance of V1 model"
" against baseline dataset for sentiment analysis",
dataset_id="3vRT5Ut6mhPqFGc23"
)
evaluation_recorder.register()
def _get_cost(inferred, actual):
if actual == inferred:
return 0
else:
return random.randint(2, 5)
urid = 1
for prob, pred, orig, txt in zip(orig_copy, y_pred, y_test, x_test):
urid = urid + 1
mi_record = SingleTagInferenceRecord(
inferred=float(pred[0]),
actual=float(orig),
urid=urid,
score=float(prob[0]),
custom_metrics=[
RecordCustomMetric(label="Cost", value=_get_cost(float(pred[0]), float(orig))),
RecordCustomMetric(label="Probability", value=float(prob[0]))
]
)
evaluation_recorder.add_record(mi_record)
outcome = evaluation_recorder.finish()
print(outcome)