Updating the input image sizes to have a higher resolution. Currently unclean data is reaching 70% accuracy.

2019-04-14 18:54:26 -05:00
parent de117a55ae
commit fac82296e2
35 changed files with 1307 additions and 62 deletions
@@ -0,0 +1,467 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "CameraExampleViewController.h"
+#import <AssertMacros.h>
+#import <AssetsLibrary/AssetsLibrary.h>
+#import <CoreImage/CoreImage.h>
+#import <ImageIO/ImageIO.h>
+
+#include <sys/time.h>
+#include <fstream>
+#include <iostream>
+#include <queue>
+
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/mutable_op_resolver.h"
+
+#define LOG(x) std::cerr
+
+// If you have your own model, modify this to the file name, and make sure
+// you've added the file to your app resources too.
+static NSString* model_file_name = @"graph";
+static NSString* model_file_type = @"lite";
+
+// If you have your own model, point this to the labels file.
+static NSString* labels_file_name = @"labels";
+static NSString* labels_file_type = @"txt";
+
+// These dimensions need to match those the model was trained with.
+static const int wanted_input_width = 224;
+static const int wanted_input_height = 224;
+static const int wanted_input_channels = 3;
+
+static NSString* FilePathForResourceName(NSString* name, NSString* extension) {
+  NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension];
+  if (file_path == NULL) {
+    LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String]
+               << "' in bundle.";
+  }
+  return file_path;
+}
+
+static void LoadLabels(NSString* file_name, NSString* file_type,
+                       std::vector<std::string>* label_strings) {
+  NSString* labels_path = FilePathForResourceName(file_name, file_type);
+  if (!labels_path) {
+    LOG(ERROR) << "Failed to find model proto at" << [file_name UTF8String]
+               << [file_type UTF8String];
+  }
+  std::ifstream t;
+  t.open([labels_path UTF8String]);
+  std::string line;
+  while (t) {
+    std::getline(t, line);
+    if (line.length()){
+      label_strings->push_back(line);
+    }
+  }
+  t.close();
+}
+
+// Returns the top N confidence values over threshold in the provided vector,
+// sorted by confidence in descending order.
+static void GetTopN(const float* prediction, const int prediction_size, const int num_results,
+                    const float threshold, std::vector<std::pair<float, int>>* top_results) {
+  // Will contain top N results in ascending order.
+  std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
+                      std::greater<std::pair<float, int>>>
+      top_result_pq;
+
+  const long count = prediction_size;
+  for (int i = 0; i < count; ++i) {
+    const float value = prediction[i];
+    // Only add it if it beats the threshold and has a chance at being in
+    // the top N.
+    if (value < threshold) {
+      continue;
+    }
+
+    top_result_pq.push(std::pair<float, int>(value, i));
+
+    // If at capacity, kick the smallest value out.
+    if (top_result_pq.size() > num_results) {
+      top_result_pq.pop();
+    }
+  }
+
+  // Copy to output vector and reverse into descending order.
+  while (!top_result_pq.empty()) {
+    top_results->push_back(top_result_pq.top());
+    top_result_pq.pop();
+  }
+  std::reverse(top_results->begin(), top_results->end());
+}
+
+@interface CameraExampleViewController (InternalMethods)
+- (void)teardownAVCapture;
+@end
+
+@implementation CameraExampleViewController
+
+- (void) attachPreviewLayer{
+  photos_index = 0;
+  photos = nil;
+  previewLayer = [[CALayer alloc] init];
+  
+  [previewLayer setBackgroundColor:[[UIColor blackColor] CGColor]];
+  CALayer* rootLayer = [previewView layer];
+  [rootLayer setMasksToBounds:YES];
+  [previewLayer setFrame:[rootLayer bounds]];
+  [rootLayer addSublayer:previewLayer];
+  
+  [self UpdatePhoto];
+}
+
+- (void)UpdatePhoto{
+  PHAsset* asset;
+  if (photos==nil || photos_index >= photos.count){
+    [self updatePhotosLibrary];
+    photos_index=0;
+  }
+  if (photos.count){
+    asset = photos[photos_index];
+    photos_index += 1;
+    input_image = [self convertImageFromAsset:asset
+                                   targetSize:CGSizeMake(wanted_input_width, wanted_input_height)
+                                         mode:PHImageContentModeAspectFill];
+    display_image = [self convertImageFromAsset:asset
+                                     targetSize:CGSizeMake(asset.pixelWidth,asset.pixelHeight)
+                                           mode:PHImageContentModeAspectFit];
+    [self DrawImage];
+  }
+  
+  if (input_image != nil){
+    image_data image = [self CGImageToPixels:input_image.CGImage];
+    [self inputImageToModel:image];
+    [self runModel];
+  }
+}
+
+- (void)DrawImage{
+  CGFloat view_height = 800;
+  CGFloat view_width = 600;
+  
+  UIGraphicsBeginImageContextWithOptions(CGSizeMake(view_width, view_height), NO, 0.0);
+  CGContextRef context = UIGraphicsGetCurrentContext();
+  UIGraphicsPushContext(context);
+  
+  float scale = view_width/display_image.size.width;
+  
+  if (display_image.size.height*scale > view_height){
+    scale = view_height/display_image.size.height;
+  }
+  
+  CGPoint origin = CGPointMake((view_width - display_image.size.width*scale) / 2.0f,
+                               (view_height - display_image.size.height*scale) / 2.0f);
+  [display_image drawInRect:CGRectMake(origin.x, origin.y,
+                                       display_image.size.width*scale,
+                                       display_image.size.height*scale)];
+  UIGraphicsPopContext();
+  display_image = UIGraphicsGetImageFromCurrentImageContext();
+  UIGraphicsEndImageContext();
+  previewLayer.contents = (id) display_image.CGImage;
+}
+
+- (void)teardownAVCapture {
+  [previewLayer removeFromSuperlayer];
+}
+
+- (void) updatePhotosLibrary{
+  PHFetchOptions *fetchOptions = [[PHFetchOptions alloc] init];
+  fetchOptions.sortDescriptors = @[[NSSortDescriptor sortDescriptorWithKey:@"creationDate" ascending:YES]];
+  photos = [PHAsset fetchAssetsWithMediaType:PHAssetMediaTypeImage options:fetchOptions];
+}
+
+- (UIImage *) convertImageFromAsset:(PHAsset *)asset
+                         targetSize:(CGSize) targetSize
+                              mode:(PHImageContentMode) mode{
+  PHImageManager * manager = [[PHImageManager alloc] init];
+  PHImageRequestOptions * options = [[PHImageRequestOptions alloc] init];
+  NSMutableArray * images = [[NSMutableArray alloc] init];
+  NSMutableArray * infos = [[NSMutableArray alloc] init];
+  
+  options.synchronous = TRUE;
+      
+  [manager requestImageForAsset:asset
+                     targetSize:targetSize
+                    contentMode:mode
+                        options:options
+                  resultHandler:^(UIImage *image, NSDictionary *info){
+                                  [images addObject:image];
+                                  [infos addObject:info];
+                                }
+  ];
+  
+  UIImage *result = images[0];
+
+  return result;
+}
+
+- (AVCaptureVideoOrientation)avOrientationForDeviceOrientation:
+    (UIDeviceOrientation)deviceOrientation {
+  AVCaptureVideoOrientation result = (AVCaptureVideoOrientation)(deviceOrientation);
+  if (deviceOrientation == UIDeviceOrientationLandscapeLeft)
+    result = AVCaptureVideoOrientationLandscapeRight;
+  else if (deviceOrientation == UIDeviceOrientationLandscapeRight)
+    result = AVCaptureVideoOrientationLandscapeLeft;
+  return result;
+}
+
+- (image_data)CGImageToPixels:(CGImage *)image {
+  image_data result;
+  result.width = (int)CGImageGetWidth(image);
+  result.height = (int)CGImageGetHeight(image);
+  result.channels = 4;
+  
+  CGColorSpaceRef color_space = CGColorSpaceCreateDeviceRGB();
+  const int bytes_per_row = (result.width * result.channels);
+  const int bytes_in_image = (bytes_per_row * result.height);
+  result.data = std::vector<uint8_t>(bytes_in_image);
+  const int bits_per_component = 8;
+  
+  CGContextRef context =
+    CGBitmapContextCreate(result.data.data(), result.width, result.height, bits_per_component, bytes_per_row,
+                          color_space, kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big);
+  CGColorSpaceRelease(color_space);
+  CGContextDrawImage(context, CGRectMake(0, 0, result.width, result.height), image);
+  CGContextRelease(context);
+  
+  return result;
+}
+
+
+
+- (IBAction)takePicture:(id)sender {
+    [self UpdatePhoto];
+}
+
+- (void)inputImageToModel:(image_data)image{
+  float* out = interpreter->typed_input_tensor<float>(0);
+  
+  const float input_mean = 127.5f;
+  const float input_std = 127.5f;
+  assert(image.channels >= wanted_input_channels);
+  uint8_t* in = image.data.data();
+
+  for (int y = 0; y < wanted_input_height; ++y) {
+    const int in_y = (y * image.height) / wanted_input_height;
+    uint8_t* in_row = in + (in_y * image.width * image.channels);
+    float* out_row = out + (y * wanted_input_width * wanted_input_channels);
+    for (int x = 0; x < wanted_input_width; ++x) {
+      const int in_x = (x * image.width) / wanted_input_width;
+      uint8_t* in_pixel = in_row + (in_x * image.channels);
+      float* out_pixel = out_row + (x * wanted_input_channels);
+      for (int c = 0; c < wanted_input_channels; ++c) {
+        out_pixel[c] = (in_pixel[c] - input_mean) / input_std;
+      }
+    }
+  }
+}
+
+- (void)runModel {
+  double startTimestamp = [[NSDate new] timeIntervalSince1970];
+  if (interpreter->Invoke() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to invoke!";
+  }
+  double endTimestamp = [[NSDate new] timeIntervalSince1970];
+  total_latency += (endTimestamp - startTimestamp);
+  total_count += 1;
+  NSLog(@"Time: %.4lf, avg: %.4lf, count: %d", endTimestamp - startTimestamp,
+        total_latency / total_count,  total_count);
+
+  const int output_size = (int)labels.size();
+  const int kNumResults = 5;
+  const float kThreshold = 0.1f;
+
+  std::vector<std::pair<float, int>> top_results;
+
+  float* output = interpreter->typed_output_tensor<float>(0);
+  GetTopN(output, output_size, kNumResults, kThreshold, &top_results);
+  
+  std::vector<std::pair<float, std::string>> newValues;
+  for (const auto& result : top_results) {
+    std::pair<float, std::string> item;
+    item.first = result.first;
+    item.second = labels[result.second];
+    
+    newValues.push_back(item);
+  }
+  dispatch_async(dispatch_get_main_queue(), ^(void) {
+    [self setPredictionValues:newValues];
+  });
+}
+
+- (void)dealloc {
+  [self teardownAVCapture];
+}
+
+- (void)didReceiveMemoryWarning {
+  [super didReceiveMemoryWarning];
+}
+
+- (void)viewDidLoad {
+  [super viewDidLoad];
+  labelLayers = [[NSMutableArray alloc] init];
+
+  NSString* graph_path = FilePathForResourceName(model_file_name, model_file_type);
+  model = tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]);
+  if (!model) {
+    LOG(FATAL) << "Failed to mmap model " << graph_path;
+  }
+  LOG(INFO) << "Loaded model " << graph_path;
+  model->error_reporter();
+  LOG(INFO) << "resolved reporter";
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  LoadLabels(labels_file_name, labels_file_type, &labels);
+
+  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+  if (!interpreter) {
+    LOG(FATAL) << "Failed to construct interpreter";
+  }
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to allocate tensors!";
+  }
+
+  [self attachPreviewLayer];
+}
+
+- (void)viewDidUnload {
+  [super viewDidUnload];
+}
+
+- (void)viewWillAppear:(BOOL)animated {
+  [super viewWillAppear:animated];
+}
+
+- (void)viewDidAppear:(BOOL)animated {
+  [super viewDidAppear:animated];
+}
+
+- (void)viewWillDisappear:(BOOL)animated {
+  [super viewWillDisappear:animated];
+}
+
+- (void)viewDidDisappear:(BOOL)animated {
+  [super viewDidDisappear:animated];
+}
+
+- (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation {
+  return (interfaceOrientation == UIInterfaceOrientationPortrait);
+}
+
+- (BOOL)prefersStatusBarHidden {
+  return YES;
+}
+
+- (void)setPredictionValues:(std::vector<std::pair<float, std::string>>)newValues {
+
+  const float leftMargin = 10.0f;
+  const float topMargin = 10.0f;
+
+  const float valueWidth = 48.0f;
+  const float valueHeight = 18.0f;
+
+  const float labelWidth = 246.0f;
+  const float labelHeight = 18.0f;
+
+  const float labelMarginX = 5.0f;
+  const float labelMarginY = 5.0f;
+
+  [self removeAllLabelLayers];
+
+  int labelCount = 0;
+  for  (const auto& item : newValues) {
+    std::string label = item.second;
+    const float value = item.first;
+    const float originY = topMargin + ((labelHeight + labelMarginY) * labelCount);
+    const int valuePercentage = (int)roundf(value * 100.0f);
+
+    const float valueOriginX = leftMargin;
+    NSString* valueText = [NSString stringWithFormat:@"%d%%", valuePercentage];
+
+    [self addLabelLayerWithText:valueText
+                        originX:valueOriginX
+                        originY:originY
+                          width:valueWidth
+                         height:valueHeight
+                      alignment:kCAAlignmentRight];
+
+    const float labelOriginX = (leftMargin + valueWidth + labelMarginX);
+
+    NSString *nsLabel = [NSString stringWithCString:label.c_str()
+                                           encoding:[NSString defaultCStringEncoding]];
+    [self addLabelLayerWithText:[nsLabel capitalizedString]
+                        originX:labelOriginX
+                        originY:originY
+                          width:labelWidth
+                         height:labelHeight
+                      alignment:kCAAlignmentLeft];
+
+    labelCount += 1;
+    if (labelCount > 4) {
+      break;
+    }
+  }
+}
+
+- (void)removeAllLabelLayers {
+  for (CATextLayer* layer in labelLayers) {
+    [layer removeFromSuperlayer];
+  }
+  [labelLayers removeAllObjects];
+}
+
+- (void)addLabelLayerWithText:(NSString*)text
+                      originX:(float)originX
+                      originY:(float)originY
+                        width:(float)width
+                       height:(float)height
+                    alignment:(NSString*)alignment {
+  CFTypeRef font = (CFTypeRef) @"Menlo-Regular";
+  const float fontSize = 12.0;
+  const float marginSizeX = 5.0f;
+  const float marginSizeY = 2.0f;
+
+  const CGRect backgroundBounds = CGRectMake(originX, originY, width, height);
+  const CGRect textBounds = CGRectMake((originX + marginSizeX), (originY + marginSizeY),
+                                       (width - (marginSizeX * 2)), (height - (marginSizeY * 2)));
+
+  CATextLayer* background = [CATextLayer layer];
+  [background setBackgroundColor:[UIColor blackColor].CGColor];
+  [background setOpacity:0.5f];
+  [background setFrame:backgroundBounds];
+  background.cornerRadius = 5.0f;
+
+  [[self.view layer] addSublayer:background];
+  [labelLayers addObject:background];
+
+  CATextLayer* layer = [CATextLayer layer];
+  [layer setForegroundColor:[UIColor whiteColor].CGColor];
+  [layer setFrame:textBounds];
+  [layer setAlignmentMode:alignment];
+  [layer setWrapped:YES];
+  [layer setFont:font];
+  [layer setFontSize:fontSize];
+  layer.contentsScale = [[UIScreen mainScreen] scale];
+  [layer setString:text];
+
+  [[self.view layer] addSublayer:layer];
+  [labelLayers addObject:layer];
+}
+
+@end