LabAssignment2Report - aaz000966/CS5542 GitHub Wiki

CS5542 Big Data Analytics and App Lab Assignment #1 Report Zakari, Abdulmuhaymin (29)

This is a lab assignment report for generating captions of a dataset using the Show and Tell model. Main(): Taking input files from the project profiles. These files are the model, the captions file, and the input images.

Disclaimer: This program is inspired directly by the codes that were given in CS5542 (UMKC Class) By Ms. Mayanka Shekar, and on it we're trying to improve and develop!

#required libraries
from future import absolute_import
from future import division
from future import print_function
import nltk
import nltk.translate.gleu_score as gleu
import numpy
import os
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
import nltk
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
from nltk.translate.bleu_score import sentence_bleu
import logging
import math
import tensorflow as tf
from main.caption_generator import CaptionGenerator
from main.model import ShowAndTellModel
from main.vocabulary import Vocabulary
FLAGS = tf.flags.FLAGS
setting input files and model location
tf.flags.DEFINE_string("model_path", "..\show-and-tell.pb", "Model path")
tf.flags.DEFINE_string("vocab_file", "..\word_counts.txt", "vocabularies.")
tf.flags.DEFINE_string("input_files", r"..\in3.jpg", "input")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(name)
def main(_):

model = ShowAndTellModel(FLAGS.model_path)

```
vocab = Vocabulary(FLAGS.vocab_file)
```
```
filenames = _load_filenames()
```

can1 = "a table with different kinds of food"

```
candidate=can1.split()
```

generator = CaptionGenerator(model, vocab)

```
for filename in filenames:
```

    with tf.gfile.GFile(filename, "rb") as f:

```
        image = f.read()
```

    captions = generator.beam_search(image)

```
    print("Captions: ")
```

    for i, caption in enumerate(captions):

        sentence = [vocab.id_to_token(w) for w in caption.sentence[1:-1]]

```
        sentence = " ".join(sentence)
```

        temp = "  %d) %s (p=%f)" % (i+1, sentence, math.exp(caption.logprob))

```
        print(temp)
```
```
        comp = [sentence.split()]
```
```
        # Calculating The Blue Score
```

        print('Blue cumulative 1-gram: %f' % sentence_bleu(comp, candidate, weights=(1, 0, 0, 0)))

        print('Blue cumulative 2-gram: %f' % sentence_bleu(comp, candidate, weights=(0.5, 0.5, 0, 0)))

```
        # Glue Score
```

        G = gleu.sentence_gleu(comp, candidate, min_len=1, max_len=2)

        print("Glue score for this sentence: {}".format(G))

def _load_filenames():
```
filenames = []
```

for file_pattern in FLAGS.input_files.split(","):

    filenames.extend(tf.gfile.Glob(file_pattern))

logger.info("Running caption generation on %d files matching %s",

            len(filenames), FLAGS.input_files)

```
return filenames
```
if name == "main":
```
tf.app.run()
```

The model file:

import logging
import os
import tensorflow as tf
class ShowAndTellModel(object):
```
 def __init__(self, model_path):
```
```
     self._model_path = model_path
```

    self.logger = logging.getLogger(__name__)

```
    self._load_model(model_path)
```

    self._sess = tf.Session(graph=tf.get_default_graph())

def _load_model(self, frozen_graph_path):

    model_exp = os.path.expanduser(frozen_graph_path)

```
    if os.path.isfile(model_exp):
```

        self.logger.info('Loading model filename: %s' % model_exp)

        with tf.gfile.FastGFile(model_exp, 'rb') as f:

```
            graph_def = tf.GraphDef()
```

            graph_def.ParseFromString(f.read())

            tf.import_graph_def(graph_def, name='')

```
    else:
```

        raise RuntimeError("Missing model file at path: {}".format(frozen_graph_path))

```
def feed_image(self, encoded_image):
```

    initial_state = self._sess.run(fetches="lstm/initial_state:0",

                                   feed_dict={"image_feed:0": encoded_image})

```
    return initial_state
```

def inference_step(self, input_feed, state_feed):

    softmax_output, state_output = self._sess.run(

        fetches=["softmax:0", "lstm/state:0"],

```
        feed_dict={
```

            "input_feed:0": input_feed,

            "lstm/state_feed:0": state_feed,

```
        })
```

    return softmax_output, state_output, None

vocabulary.py

from future import absolute_import
from future import division
from future import print_function
import logging
import os
class Vocabulary(object):
```
def __init__(self,
```
```
             vocab_file_path,
```
```
             start_token="<S>",
```
```
             end_token="</S>",
```
```
             unk_token="<UNK>"):
```

    self.logger = logging.getLogger(__name__)

    if not os.path.exists(vocab_file_path):

        self.logger.exception("Vocab file %s not found.", vocab_file_path)

```
        raise RuntimeError
```

    self.logger.info("Initializing vocabulary from file: %s", vocab_file_path)

    with open(vocab_file_path, mode="r") as f:

        reverse_vocab = list(f.readlines())

    reverse_vocab = [line.split()[0] for line in reverse_vocab]

    assert start_token in reverse_vocab

```
    assert end_token in reverse_vocab
```
```
    if unk_token not in reverse_vocab:
```

        reverse_vocab.append(unk_token)

    vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])

    self.logger.info("Created vocabulary with %d words" % len(vocab))

```
    self.vocab = vocab
```
```
    self.reverse_vocab = reverse_vocab
```
```
    self.start_id = vocab[start_token]
```
```
    self.end_id = vocab[end_token]
```
```
    self.unk_id = vocab[unk_token]
```
```
def token_to_id(self, token_id):
```
```
    if token_id in self.vocab:
```
```
        return self.vocab[token_id]
```
```
    else:
```
```
        return self.unk_id
```
```
def id_to_token(self, token_id):
```

    if token_id >= len(self.reverse_vocab):

        return self.reverse_vocab[self.unk_id]

```
    else:
```

        return self.reverse_vocab[token_id]

screenshots:

LabAssignment2Report - aaz000966/CS5542 GitHub Wiki

Disclaimer: This program is inspired directly by the codes that were given in CS5542 (UMKC Class) By Ms. Mayanka Shekar, and on it we're trying to improve and develop!

setting input files and model location

⚠️ **GitHub.com Fallback** ⚠️

⚠️ GitHub.com Fallback ⚠️