root/trunk/bin/maint/stats.pl @ 919

Revision 919, 12.2 KB (checked in by bradfitz, 11 years ago)

promos? hah. old-skool. bye. that was from before we ever had a business model.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl
2#
3
4use strict;
5use vars qw($dbh %maint);
6
7$maint{'genstats'} = sub
8{
9    my @which = @_;
10
11    unless (@which) { @which = qw(usage users countries
12                                  states gender clients
13                                  pop_interests meme pop_faq); }
14    my %do = map { $_, 1, } @which;
15   
16    my $dbs = LJ::get_dbs();
17    my $dbh = $dbs->{'dbh'};
18    my $dbr = $dbs->{'reader'};
19    my $sth;
20
21    my %account;
22    my %userinfo;
23    my %age;
24    my %newbyday;
25    my %gender;
26    my %stateus;
27    my %country;
28    my %pop_interests;
29    my ($nowtime, $time, $nowdate);
30
31    my %to_pop;
32
33    if ($do{'pop_faq'}) {
34        $sth = $dbr->prepare("SELECT faqid, COUNT(*) FROM faquses WHERE ".
35                             "faqid<>0 GROUP BY 1 ORDER BY 2 DESC LIMIT 50");
36        $sth->execute;
37        my $d = $to_pop{'popfaq'} = {};
38        while (my ($id, $count) = $sth->fetchrow_array) {
39            $d->{$id} = $count;
40        }
41    }
42
43    if ($do{'meme'}) {
44        $sth = $dbr->prepare("SELECT url, count(*) FROM meme GROUP BY 1 ORDER BY 2 DESC LIMIT 100");
45        $sth->execute;
46        my $memedata = $to_pop{'popmeme'} = {};
47        while (my ($url, $count) = $sth->fetchrow_array) {
48            $memedata->{$url} = $count;
49        }
50    }
51
52    if ($do{'usage'})
53    {
54        print "-I- Getting usage by day in last month...\n";
55        $sth = $dbr->prepare("SELECT UNIX_TIMESTAMP(), DATE_FORMAT(NOW(), '%Y-%m-%d')");
56        $sth->execute;
57        ($nowtime, $nowdate) = $sth->fetchrow_array;
58       
59        print "Date is: $nowdate\n";
60       
61        for (my $days_back = 30; $days_back > 0; $days_back--) {
62            print "  going back $days_back days... ";
63            $time = $nowtime - 86400*$days_back;
64            my ($year, $month, $day) = (localtime($time))[5, 4, 3];
65            $year += 1900;
66            $month += 1;
67            my $date = sprintf("%04d-%02d-%02d", $year, $month, $day);
68            my $qdate = $dbr->quote($date);
69            my $exist = $dbr->selectrow_array("SELECT COUNT(*) FROM stats WHERE statcat='postsbyday' AND statkey=$qdate");
70            if ($exist) {
71                print "exists.\n";
72                next;
73            }
74
75            my $total = 0;
76            $total += $dbr->selectrow_array("SELECT COUNT(*) FROM log WHERE year=$year ".
77                                            "AND month=$month AND day=$day");
78            foreach my $c (@LJ::CLUSTERS) {
79                my $dbcr = LJ::get_cluster_reader($c);
80                $total += $dbcr->selectrow_array("SELECT COUNT(*) FROM log2 WHERE year=$year ".
81                                                 "AND month=$month AND day=$day");
82            }
83           
84            print "$date = $total entries\n";
85            $dbh->do("REPLACE INTO stats (statcat, statkey, statval) ".
86                     "VALUES ('postsbyday', $qdate, $total)");
87        }
88    }
89
90    if ($do{'users'})
91    {
92        $to_pop{'userinfo'} = \%userinfo;
93        $to_pop{'account'} = \%account;
94        $to_pop{'age'} = \%age;
95        $to_pop{'newbyday'} = \%newbyday;
96
97        print "-I- Getting user stats...\n";
98
99        my $now = time();
100        my $count;
101        $sth = $dbr->prepare("SELECT COUNT(*) FROM user");
102        $sth->execute;
103        my ($usertotal) = $sth->fetchrow_array;
104        my $pagesize = 1000;
105        my $pages = int($usertotal / $pagesize) + (($usertotal % $pagesize) ? 1 : 0);
106       
107        for (my $page=0; $page < $pages; $page++)
108        {
109            my $skip = $page*$pagesize;
110            my $first = $skip+1;
111            my $last = $skip+$pagesize;
112            print "  getting records $first-$last...\n";
113            $sth = $dbr->prepare("SELECT DATE_FORMAT(uu.timecreate, '%Y-%m-%d') AS 'datereg', u.user, u.caps, FLOOR((TO_DAYS(NOW())-TO_DAYS(u.bdate))/365.25) AS 'age', UNIX_TIMESTAMP(uu.timeupdate) AS 'timeupdate', u.status, u.allow_getljnews FROM user u, userusage uu WHERE u.userid=uu.userid LIMIT $skip,$pagesize");
114            $sth->execute;
115            while (my $rec = $sth->fetchrow_hashref)
116            {
117                my $co = $rec->{'country'};
118                if ($co) {
119                    $country{$co}++;
120                    if ($co eq "US" && $rec->{'state'}) {
121                        $stateus{$rec->{'state'}}++;
122                    }
123                }
124               
125                my $capnameshort = LJ::name_caps_short($rec->{'caps'});
126                $account{$capnameshort}++;
127               
128                unless ($rec->{'datereg'} eq $nowdate) {
129                    $newbyday{$rec->{'datereg'}}++;
130                }
131               
132                if ($rec->{'age'} > 4 && $rec->{'age'} < 110) {
133                    $age{$rec->{'age'}}++;
134                }
135               
136                $userinfo{'total'}++;
137                $time = $rec->{'timeupdate'};
138                $userinfo{'updated'}++ if ($time);
139                $userinfo{'updated_last30'}++ if ($time > $now-60*60*24*30);
140                $userinfo{'updated_last7'}++ if ($time > $now-60*60*24*7);
141                $userinfo{'updated_last1'}++ if ($time > $now-60*60*24*1);
142               
143                if ($rec->{'status'} eq "A")
144                {
145                    for (qw(allow_getljnews))
146                    {
147                        $userinfo{$_}++ if ($rec->{$_} eq "Y");
148                    }
149                }
150               
151            }
152        }
153    }
154
155    if ($do{'countries'})
156    {
157        $to_pop{'country'} = \%country;
158
159        print "-I- Countries.\n";
160        $sth = $dbr->prepare("SELECT value, COUNT(*) AS 'count' FROM userprop WHERE upropid=3 AND value<>'' GROUP BY 1 ORDER BY 2");
161        $sth->execute;
162        while ($_ = $sth->fetchrow_hashref) {
163            $country{$_->{'value'}} = $_->{'count'};
164        }
165    }
166
167    if ($do{'states'})
168    {
169        $to_pop{'stateus'} = \%stateus;
170
171        print "-I- US States.\n";
172        $sth = $dbr->prepare("SELECT ua.value, COUNT(*) AS 'count' FROM userprop ua, userprop ub WHERE ua.userid=ub.userid AND ua.upropid=4 and ub.upropid=3 and ub.value='US' AND ub.value<>'' GROUP BY 1 ORDER BY 2");
173        $sth->execute;
174        while ($_ = $sth->fetchrow_hashref) {
175            $stateus{$_->{'value'}} = $_->{'count'};
176        }
177    }
178
179    if ($do{'gender'})
180    {
181        $to_pop{'gender'} = \%gender;
182
183        print "-I- Gender.\n";
184        $sth = $dbr->prepare("SELECT up.value, COUNT(*) AS 'count' FROM userprop up, userproplist upl WHERE up.upropid=upl.upropid AND upl.name='gender' GROUP BY 1");
185        $sth->execute;
186        while ($_ = $sth->fetchrow_hashref) {
187            $gender{$_->{'value'}} = $_->{'count'};
188        }
189    }
190
191    if ($do{'pop_interests'})
192    {
193        $to_pop{'pop_interests'} = \%pop_interests;
194
195        print "-I- Interests.\n";
196        $sth = $dbr->prepare("SELECT interest, intcount FROM interests WHERE intcount>2 ORDER BY intcount DESC, interest ASC LIMIT 400");
197        $sth->execute;
198        while (my ($int, $count) = $sth->fetchrow_array) {
199            $pop_interests{$int} = $count;
200        }
201    }
202   
203    foreach my $cat (keys %to_pop)
204    {
205        print "  dumping $cat stats\n";
206        my $qcat = $dbh->quote($cat);
207        $dbh->do("DELETE FROM stats WHERE statcat=$qcat");
208        if ($dbh->err) { die $dbh->errstr; }
209        foreach (sort keys %{$to_pop{$cat}}) {
210            my $qkey = $dbh->quote($_);
211            my $qval = $to_pop{$cat}->{$_}+0;
212            $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES ($qcat, $qkey, $qval)");
213            if ($dbh->err) { die $dbh->errstr; }
214        }
215    }
216
217    #### client usage stats
218
219    if ($do{'clients'})
220    {
221        print "-I- Clients.\n";
222        $sth = $dbr->prepare("SELECT c.client, COUNT(*) AS 'count' FROM clients c, clientusage cu ".
223                             "WHERE c.clientid=cu.clientid AND cu.lastlogin > ".
224                             "DATE_SUB(NOW(), INTERVAL 30 DAY) GROUP BY 1 ORDER BY 2");
225        $sth->execute;
226
227        $dbh->do("DELETE FROM stats WHERE statcat='client'");
228        while ($_ = $sth->fetchrow_hashref) {
229            my $qkey = $dbh->quote($_->{'client'});
230            my $qval = $_->{'count'}+0;
231            $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES ('client', $qkey, $qval)");
232        }
233    }
234
235    #### dump to text file
236    print "-I- Dumping to a text file.\n";
237
238    $sth = $dbh->prepare("SELECT statcat, statkey, statval FROM stats ORDER BY 1, 2");
239    $sth->execute;
240    open (OUT, ">$LJ::HTDOCS/stats/stats.txt");
241    while (@_ = $sth->fetchrow_array) {
242        print OUT join("\t", @_), "\n";
243    }
244    close OUT;
245
246    #### do stat box stuff
247    print "-I- Preparing stat box overviews.\n";
248    my %statbox;
249    my $v;
250
251    ## total users
252    $sth = $dbh->prepare("SELECT statval FROM stats WHERE statcat='userinfo' AND statkey='total'");
253    $sth->execute;
254    ($v) = $sth->fetchrow_array;
255    $statbox{'totusers'} = $v;
256   
257    ## how many posts yesterday
258    $sth = $dbh->prepare("SELECT statval FROM stats WHERE statcat='postsbyday' ORDER BY statkey DESC LIMIT 1");
259    $sth->execute;
260    ($v) = $sth->fetchrow_array;
261    $statbox{'postyester'} = $v;
262
263    foreach my $k (keys %statbox) {
264        my $qk = $dbh->quote($k);
265        my $qv = $dbh->quote($statbox{$k});
266        $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES ('statbox', $qk, $qv)");
267    }
268
269    print "-I- Done.\n";
270
271};
272
273$maint{'genstats_weekly'} = sub
274{
275    my $dbh = LJ::get_dbh("master");
276
277    my ($sth);
278    my %supportrank;
279
280    print "-I- Support rank.\n";
281    $sth = $dbh->prepare("SELECT u.userid, SUM(sp.points) AS 'points' FROM user u, supportpoints sp WHERE u.userid=sp.userid GROUP BY 1 ORDER BY 2 DESC");
282    my $rank = 0;
283    my $lastpoints = 0;
284    my $buildup = 0;
285    $sth->execute;
286    {
287        while ($_ = $sth->fetchrow_hashref)
288        {
289            if ($lastpoints != $_->{'points'}) {
290                $lastpoints = $_->{'points'};
291                $rank += (1 + $buildup);
292                $buildup = 0;
293            } else {
294                $buildup++;
295            }
296            $supportrank{$_->{'userid'}} = $rank;
297        }
298    }
299
300    $dbh->do("DELETE FROM stats WHERE statcat='supportrank_prev'");
301    $dbh->do("UPDATE stats SET statcat='supportrank_prev' WHERE statcat='supportrank'");
302
303    my %to_pop = (
304                  "supportrank" => \%supportrank,
305                  );
306   
307    foreach my $cat (keys %to_pop)
308    {
309        print "  dumping $cat stats\n";
310        my $qcat = $dbh->quote($cat);
311        $dbh->do("DELETE FROM stats WHERE statcat=$qcat");
312        if ($dbh->err) { die $dbh->errstr; }
313        foreach (sort keys %{$to_pop{$cat}}) {
314            my $qkey = $dbh->quote($_);
315            my $qval = $to_pop{$cat}->{$_}+0;
316            $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES ($qcat, $qkey, $qval)");
317            if ($dbh->err) { die $dbh->errstr; }
318        }
319    }
320
321};
322
323$maint{'build_randomuserset'} = sub
324{
325    ## this sets up the randomuserset table daily (or whenever) that htdocs/random.bml uses to
326    ## find a random user that is both 1) publicly listed in the directory, and 2) updated
327    ## within the past 24 hours.
328
329    ## note that if a user changes their privacy setting to not be in the database, it'll take
330    ## up to 24 hours for them to be removed from the random.bml listing, but that's acceptable.
331
332    my $dbh = LJ::get_dbh("master");
333
334    print "-I- Building randomuserset.\n";
335    $dbh->do("TRUNCATE TABLE randomuserset");
336    $dbh->do("REPLACE INTO randomuserset (userid) SELECT uu.userid FROM userusage uu, user u WHERE u.userid=uu.userid AND u.allow_infoshow='Y' AND uu.timeupdate > DATE_SUB(NOW(), INTERVAL 1 DAY)");
337    my $num = $dbh->selectrow_array("SELECT MAX(rid) FROM randomuserset");
338    $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES ('userinfo', 'randomcount', $num)");
339    print "-I- Done.\n";
340};
341
342$maint{'memeclean'} = sub
343{
344    my $dbh = LJ::get_dbh("master");
345    my $sth;
346
347    $sth = $dbh->prepare("SELECT statkey FROM stats WHERE statcat='popmeme'");
348    $sth->execute;
349    while (my $url = $sth->fetchrow_array)
350    {
351        my $copy = $url;
352        LJ::run_hooks("canonicalize_url", \$copy);
353        unless ($copy) {
354            my $d = $dbh->quote($url);
355            $dbh->do("DELETE FROM stats WHERE statcat='popmeme' AND statkey=$d");
356            print "Deleting: $url\n";
357        }
358    }
359    print "done.\n";
360};
361
3621;
Note: See TracBrowser for help on using the browser.