From 9ece6666bfa59af58c000ea5640c7afd0ea2249e Mon Sep 17 00:00:00 2001 From: joshua stein Date: Wed, 7 Nov 2012 21:58:10 -0600 Subject: [PATCH] add stupid temporary hack to strip out utf8mb4 chars that are screwing up mysql 4-byte utf8 chars like emoji are passed around in ruby fine, but when they are put into mysql queries, strings get truncated at the first mb4 character. to prevent truncation, strip out mb4 characters in most user-controlled fields like comments, story descriptions and titles, and messages. to properly support utf8mb4, mysql server 5.5 is needed, the table encodings need to be changed to utf8mb4, and the mysql2 gem needs to be upgraded once it supports utf8mb4: https://github.com/brianmario/mysql2/issues/249 --- app/models/comment.rb | 3 ++- app/models/message.rb | 10 ++++++++++ app/models/story.rb | 8 +++++++- lib/monkey.rb | 19 +++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/app/models/comment.rb b/app/models/comment.rb index 52217bf..f1a43ff 100644 --- a/app/models/comment.rb +++ b/app/models/comment.rb @@ -235,7 +235,8 @@ class Comment < ActiveRecord::Base end def comment=(com) - self[:comment] = com.to_s.rstrip + # TODO: remove remove_mb4 hack + self[:comment] = com.to_s.rstrip.remove_mb4 self.markeddown_comment = self.generated_markeddown_comment end diff --git a/app/models/message.rb b/app/models/message.rb index 49e68a7..84a5aef 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -76,6 +76,16 @@ class Message < ActiveRecord::Base errors.add(:recipient_username, "is not a valid user") end end + + # TODO: remove remove_mb4 hack + def body=(b) + self[:body] = b.to_s.remove_mb4 + end + + # TODO: remove remove_mb4 hack + def subject=(s) + self[:subject] = s.to_s.remove_mb4 + end def linkified_body Markdowner.to_html(self.body) diff --git a/app/models/story.rb b/app/models/story.rb index c06b075..734e049 100644 --- a/app/models/story.rb +++ b/app/models/story.rb @@ -313,9 +313,15 @@ class Story < ActiveRecord::Base self[:url] = u end + # TODO: remove remove_mb4 hack + def description=(d) + self[:description] = d.to_s.remove_mb4 + end + def title=(t) # change unicode whitespace characters into real spaces - self[:title] = t.strip + # TODO: remove remove_mb4 hack + self[:title] = t.strip.remove_mb4 end def title_as_url diff --git a/lib/monkey.rb b/lib/monkey.rb index 644c1f8..ae0e00f 100644 --- a/lib/monkey.rb +++ b/lib/monkey.rb @@ -9,3 +9,22 @@ module ActiveRecord end end end + +# XXX stupid hack to strip out utf8mb4 chars that may break mysql queries +# TODO upgrade to mysql 5.5, convert tables to utf8mb4, upgrade mysql2 gem when +# it supports utf8mb4, and remove this hack +class String + def remove_mb4 + t = "".force_encoding(self.encoding) + + self.each_char do |c| + if c.bytesize == 4 + t << " " + else + t << c + end + end + + t + end +end