Skip to content

Commit 84b728c

Browse files
committed
optimized: added stream file processing
1 parent 410328b commit 84b728c

File tree

4 files changed

+96
-137
lines changed

4 files changed

+96
-137
lines changed

Gemfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
source "https://rubygems.org"
22
gem "ruby-prof"
3-
# gem "rspec-benchmark"
4-
gem "ruby-progressbar"
3+
gem "rspec-benchmark"
54
gem "minitest"
65
gem "memory_profiler"
76
gem "stackprof"

Gemfile.lock

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,31 @@
11
GEM
22
remote: https://rubygems.org/
33
specs:
4+
benchmark-malloc (0.2.0)
5+
benchmark-perf (0.6.0)
6+
benchmark-trend (0.4.0)
7+
diff-lcs (1.6.0)
48
memory_profiler (1.1.0)
59
minitest (5.25.4)
10+
rspec (3.13.0)
11+
rspec-core (~> 3.13.0)
12+
rspec-expectations (~> 3.13.0)
13+
rspec-mocks (~> 3.13.0)
14+
rspec-benchmark (0.6.0)
15+
benchmark-malloc (~> 0.2)
16+
benchmark-perf (~> 0.6)
17+
benchmark-trend (~> 0.4)
18+
rspec (>= 3.0)
19+
rspec-core (3.13.3)
20+
rspec-support (~> 3.13.0)
21+
rspec-expectations (3.13.3)
22+
diff-lcs (>= 1.2.0, < 2.0)
23+
rspec-support (~> 3.13.0)
24+
rspec-mocks (3.13.2)
25+
diff-lcs (>= 1.2.0, < 2.0)
26+
rspec-support (~> 3.13.0)
27+
rspec-support (3.13.2)
628
ruby-prof (1.6.3)
7-
ruby-progressbar (1.13.0)
829
stackprof (0.2.27)
930

1031
PLATFORMS
@@ -14,8 +35,8 @@ PLATFORMS
1435
DEPENDENCIES
1536
memory_profiler
1637
minitest
38+
rspec-benchmark
1739
ruby-prof
18-
ruby-progressbar
1940
stackprof
2041

2142
BUNDLED WITH

memory_watcher.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ def start
88
@thread = Thread.new do
99
until @should_stop
1010
current_memory = `ps -o rss= -p #{Process.pid}`.to_i / 1024
11+
puts "MEMORY USAGE: #{current_memory} MB"
1112
if current_memory > @memory_limit_mb
1213
puts "Memory limit exceeded: #{current_memory}MB > #{@memory_limit_mb}MB"
1314
puts "Killing process..."

task-2.rb

Lines changed: 71 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -4,154 +4,92 @@
44
require 'date'
55
require_relative 'memory_watcher'
66

7-
class User
8-
attr_reader :attributes, :sessions
9-
10-
def initialize(attributes:, sessions:)
11-
@attributes = attributes
12-
@sessions = sessions
13-
end
14-
end
15-
16-
def parse_user(user)
17-
fields = user.split(',')
18-
parsed_result = {
19-
'id' => fields[1],
20-
'first_name' => fields[2],
21-
'last_name' => fields[3],
22-
'age' => fields[4],
23-
}
24-
end
25-
26-
def parse_session(session)
27-
fields = session.split(',')
28-
parsed_result = {
29-
'user_id' => fields[1],
30-
'session_id' => fields[2],
31-
'browser' => fields[3],
32-
'time' => fields[4],
33-
'date' => fields[5],
34-
}
35-
end
36-
37-
def collect_stats_from_users(report, users_objects, &block)
38-
users_objects.each do |user|
39-
user_key = "#{user.attributes['first_name']}" + ' ' + "#{user.attributes['last_name']}"
40-
report['usersStats'][user_key] ||= {}
41-
report['usersStats'][user_key] = report['usersStats'][user_key].merge!(block.call(user))
42-
end
7+
def write_sessions(file, user_sessions_count, user_total_time, user_longest_session, user_browsers, user_dates, ie, chrome)
8+
file.write <<-JSON
9+
\"sessionsCount\": #{user_sessions_count},
10+
\"totalTime\": "#{user_total_time} min.",
11+
\"longestSession\": "#{user_longest_session} min.",
12+
\"browsers\": "#{user_browsers.sort.join(', ')}",
13+
\"usedIE\": #{ie},
14+
\"alwaysUsedChrome\": #{chrome},
15+
\"dates\": #{user_dates.sort.reverse}
16+
}
17+
JSON
4318
end
4419

45-
def work(file_path = 'data.txt', disable_gc = false, memory_watcher = false)
20+
def work(file_path = 'data.txt', memory_watcher = false)
4621
if memory_watcher
4722
memory_watcher = MemoryWatcher.new(70)
4823
memory_watcher.start
4924
end
5025

51-
GC.disable if disable_gc
26+
all_browsers = Set.new
27+
total_users = 0
28+
total_sessions = 0
5229

53-
file_lines = File.read(file_path).split("\n")
30+
first_user = true
31+
user_sessions_count = 0
32+
user_total_time = 0
33+
user_longest_session = 0
34+
user_browsers = []
35+
ie = false
36+
chrome = true
37+
user_dates = []
5438

55-
users = []
56-
sessions = []
5739
begin
58-
file_lines.each do |line|
59-
case
60-
when line.start_with?('user,')
61-
users << parse_user(line)
62-
when line.start_with?('session,')
63-
sessions << parse_session(line)
40+
File.open("result.json", 'w') do |file|
41+
file.write("{ \"usersStats\":{")
42+
43+
File.foreach(file_path, chomp: true).each do |line|
44+
record_type, _, user_name_or_session_id, user_second_name_or_browser_name, session_time, browser_date = line.split(',')
45+
if record_type == 'user'
46+
unless first_user
47+
write_sessions(file, user_sessions_count, user_total_time, user_longest_session, user_browsers, user_dates, ie, chrome)
48+
file.write ','
49+
end
50+
51+
file.write "\"#{user_name_or_session_id} #{user_second_name_or_browser_name}\": {"
52+
total_users += 1
53+
first_user = false
54+
user_total_time = 0
55+
user_longest_session = 0
56+
user_browsers = []
57+
user_dates = []
58+
ie = false
59+
chrome = true
60+
user_sessions_count = 0
61+
elsif record_type == "session"
62+
user_session_time = session_time.to_i
63+
user_total_time += user_session_time
64+
65+
user_longest_session = user_session_time if user_session_time > user_longest_session
66+
67+
user_browsers << user_second_name_or_browser_name.upcase!
68+
69+
unless ie
70+
ie = true if user_second_name_or_browser_name =~ /INTERNET EXPLORER/
71+
end
72+
if chrome
73+
chrome = false unless user_second_name_or_browser_name =~ /CHROME/
74+
end
75+
76+
user_dates << browser_date
77+
user_sessions_count += 1
78+
total_sessions += 1
79+
all_browsers.add(user_second_name_or_browser_name)
80+
end
6481
end
65-
end
66-
67-
# Отчёт в json
68-
# - Сколько всего юзеров +
69-
# - Сколько всего уникальных браузеров +
70-
# - Сколько всего сессий +
71-
# - Перечислить уникальные браузеры в алфавитном порядке через запятую и капсом +
72-
#
73-
# - По каждому пользователю
74-
# - сколько всего сессий +
75-
# - сколько всего времени +
76-
# - самая длинная сессия +
77-
# - браузеры через запятую +
78-
# - Хоть раз использовал IE? +
79-
# - Всегда использовал только Хром? +
80-
# - даты сессий в порядке убывания через запятую +
81-
82-
report = {}
83-
84-
report[:totalUsers] = users.count
85-
86-
# Подсчёт количества уникальных браузеров
87-
uniqueBrowsers = []
88-
sessions.each do |session|
89-
browser = session['browser']
90-
uniqueBrowsers += [browser] if uniqueBrowsers.all? { |b| b != browser }
91-
end
92-
93-
report['uniqueBrowsersCount'] = uniqueBrowsers.count
94-
95-
report['totalSessions'] = sessions.count
9682

97-
report['allBrowsers'] =
98-
sessions
99-
.map { |s| s['browser'] }
100-
.map { |b| b.upcase }
101-
.sort
102-
.uniq
103-
.join(',')
104-
105-
# Статистика по пользователям
106-
users_objects = []
107-
108-
sessions_by_user = sessions.group_by { |session| session['user_id'] }
109-
110-
users.each do |user|
111-
attributes = user
112-
user_sessions = sessions_by_user[user['id']] || []
113-
user_object = User.new(attributes: attributes, sessions: user_sessions)
114-
users_objects << user_object
115-
end
116-
117-
report['usersStats'] = {}
118-
119-
# Собираем количество сессий по пользователям
120-
collect_stats_from_users(report, users_objects) do |user|
121-
{ 'sessionsCount' => user.sessions.count }
122-
end
123-
124-
# Собираем количество времени по пользователям
125-
collect_stats_from_users(report, users_objects) do |user|
126-
{ 'totalTime' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.sum.to_s + ' min.' }
127-
end
128-
129-
# Выбираем самую длинную сессию пользователя
130-
collect_stats_from_users(report, users_objects) do |user|
131-
{ 'longestSession' => user.sessions.map {|s| s['time']}.map {|t| t.to_i}.max.to_s + ' min.' }
132-
end
133-
134-
# Браузеры пользователя через запятую
135-
collect_stats_from_users(report, users_objects) do |user|
136-
{ 'browsers' => user.sessions.map {|s| s['browser']}.map {|b| b.upcase}.sort.join(', ') }
137-
end
138-
139-
# Хоть раз использовал IE?
140-
collect_stats_from_users(report, users_objects) do |user|
141-
{ 'usedIE' => user.sessions.map{|s| s['browser']}.any? { |b| b.upcase =~ /INTERNET EXPLORER/ } }
142-
end
143-
144-
# Всегда использовал только Chrome?
145-
collect_stats_from_users(report, users_objects) do |user|
146-
{ 'alwaysUsedChrome' => user.sessions.map{|s| s['browser']}.all? { |b| b.upcase =~ /CHROME/ } }
147-
end
83+
write_sessions(file, user_sessions_count, user_total_time, user_longest_session, user_browsers, user_dates, ie, chrome)
14884

149-
# Даты сессий через запятую в обратном порядке в формате iso8601
150-
collect_stats_from_users(report, users_objects) do |user|
151-
{ 'dates' => user.sessions.map{|s| s['date']}.sort.reverse }
85+
file.write("},")
86+
file.write "\"uniqueBrowsersCount\": #{all_browsers.count},"
87+
file.write "\"totalSessions\": #{total_sessions},"
88+
file.write "\"allBrowsers\": \"#{all_browsers.sort.join(',')}\","
89+
file.write "\"totalUsers\": #{total_users}"
90+
file.write("}")
15291
end
15392

154-
File.write('result.json', "#{report.to_json}\n")
15593
puts "MEMORY USAGE: %d MB" % (`ps -o rss= -p #{Process.pid}`.to_i / 1024)
15694
ensure
15795
memory_watcher.stop if memory_watcher

0 commit comments

Comments
 (0)